1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/MemoryLocation.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineJumpTableInfo.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 30 #include "llvm/CodeGen/ValueTypes.h" 31 #include "llvm/IR/DiagnosticInfo.h" 32 #include "llvm/IR/DiagnosticPrinter.h" 33 #include "llvm/IR/IRBuilder.h" 34 #include "llvm/IR/IntrinsicsRISCV.h" 35 #include "llvm/IR/PatternMatch.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/KnownBits.h" 40 #include "llvm/Support/MathExtras.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <optional> 43 44 using namespace llvm; 45 46 #define DEBUG_TYPE "riscv-lower" 47 48 STATISTIC(NumTailCalls, "Number of tail calls"); 49 50 static cl::opt<unsigned> ExtensionMaxWebSize( 51 DEBUG_TYPE "-ext-max-web-size", cl::Hidden, 52 cl::desc("Give the maximum size (in number of nodes) of the web of " 53 "instructions that we will consider for VW expansion"), 54 cl::init(18)); 55 56 static cl::opt<bool> 57 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, 58 cl::desc("Allow the formation of VW_W operations (e.g., " 59 "VWADD_W) with splat constants"), 60 cl::init(false)); 61 62 static cl::opt<unsigned> NumRepeatedDivisors( 63 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, 64 cl::desc("Set the minimum number of repetitions of a divisor to allow " 65 "transformation to multiplications by the reciprocal"), 66 cl::init(2)); 67 68 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 69 const RISCVSubtarget &STI) 70 : TargetLowering(TM), Subtarget(STI) { 71 72 if (Subtarget.isRV32E()) 73 report_fatal_error("Codegen not yet implemented for RV32E"); 74 75 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 76 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 77 78 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 79 !Subtarget.hasStdExtF()) { 80 errs() << "Hard-float 'f' ABI can't be used for a target that " 81 "doesn't support the F instruction set extension (ignoring " 82 "target-abi)\n"; 83 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 84 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 85 !Subtarget.hasStdExtD()) { 86 errs() << "Hard-float 'd' ABI can't be used for a target that " 87 "doesn't support the D instruction set extension (ignoring " 88 "target-abi)\n"; 89 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 90 } 91 92 switch (ABI) { 93 default: 94 report_fatal_error("Don't know how to lower this ABI"); 95 case RISCVABI::ABI_ILP32: 96 case RISCVABI::ABI_ILP32F: 97 case RISCVABI::ABI_ILP32D: 98 case RISCVABI::ABI_LP64: 99 case RISCVABI::ABI_LP64F: 100 case RISCVABI::ABI_LP64D: 101 break; 102 } 103 104 MVT XLenVT = Subtarget.getXLenVT(); 105 106 // Set up the register classes. 107 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 108 109 if (Subtarget.hasStdExtZfhOrZfhmin()) 110 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 111 if (Subtarget.hasStdExtF()) 112 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 113 if (Subtarget.hasStdExtD()) 114 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 115 116 static const MVT::SimpleValueType BoolVecVTs[] = { 117 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 118 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 119 static const MVT::SimpleValueType IntVecVTs[] = { 120 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 121 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 122 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 123 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 124 MVT::nxv4i64, MVT::nxv8i64}; 125 static const MVT::SimpleValueType F16VecVTs[] = { 126 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 127 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 128 static const MVT::SimpleValueType F32VecVTs[] = { 129 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 130 static const MVT::SimpleValueType F64VecVTs[] = { 131 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 132 133 if (Subtarget.hasVInstructions()) { 134 auto addRegClassForRVV = [this](MVT VT) { 135 // Disable the smallest fractional LMUL types if ELEN is less than 136 // RVVBitsPerBlock. 137 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN(); 138 if (VT.getVectorMinNumElements() < MinElts) 139 return; 140 141 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 142 const TargetRegisterClass *RC; 143 if (Size <= RISCV::RVVBitsPerBlock) 144 RC = &RISCV::VRRegClass; 145 else if (Size == 2 * RISCV::RVVBitsPerBlock) 146 RC = &RISCV::VRM2RegClass; 147 else if (Size == 4 * RISCV::RVVBitsPerBlock) 148 RC = &RISCV::VRM4RegClass; 149 else if (Size == 8 * RISCV::RVVBitsPerBlock) 150 RC = &RISCV::VRM8RegClass; 151 else 152 llvm_unreachable("Unexpected size"); 153 154 addRegisterClass(VT, RC); 155 }; 156 157 for (MVT VT : BoolVecVTs) 158 addRegClassForRVV(VT); 159 for (MVT VT : IntVecVTs) { 160 if (VT.getVectorElementType() == MVT::i64 && 161 !Subtarget.hasVInstructionsI64()) 162 continue; 163 addRegClassForRVV(VT); 164 } 165 166 if (Subtarget.hasVInstructionsF16()) 167 for (MVT VT : F16VecVTs) 168 addRegClassForRVV(VT); 169 170 if (Subtarget.hasVInstructionsF32()) 171 for (MVT VT : F32VecVTs) 172 addRegClassForRVV(VT); 173 174 if (Subtarget.hasVInstructionsF64()) 175 for (MVT VT : F64VecVTs) 176 addRegClassForRVV(VT); 177 178 if (Subtarget.useRVVForFixedLengthVectors()) { 179 auto addRegClassForFixedVectors = [this](MVT VT) { 180 MVT ContainerVT = getContainerForFixedLengthVector(VT); 181 unsigned RCID = getRegClassIDForVecVT(ContainerVT); 182 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 183 addRegisterClass(VT, TRI.getRegClass(RCID)); 184 }; 185 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 186 if (useRVVForFixedLengthVectorVT(VT)) 187 addRegClassForFixedVectors(VT); 188 189 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 190 if (useRVVForFixedLengthVectorVT(VT)) 191 addRegClassForFixedVectors(VT); 192 } 193 } 194 195 // Compute derived properties from the register classes. 196 computeRegisterProperties(STI.getRegisterInfo()); 197 198 setStackPointerRegisterToSaveRestore(RISCV::X2); 199 200 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT, 201 MVT::i1, Promote); 202 // DAGCombiner can call isLoadExtLegal for types that aren't legal. 203 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, 204 MVT::i1, Promote); 205 206 // TODO: add all necessary setOperationAction calls. 207 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 208 209 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 210 setOperationAction(ISD::BR_CC, XLenVT, Expand); 211 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 212 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 213 214 setCondCodeAction(ISD::SETLE, XLenVT, Expand); 215 setCondCodeAction(ISD::SETGT, XLenVT, Custom); 216 setCondCodeAction(ISD::SETGE, XLenVT, Expand); 217 setCondCodeAction(ISD::SETULE, XLenVT, Expand); 218 setCondCodeAction(ISD::SETUGT, XLenVT, Custom); 219 setCondCodeAction(ISD::SETUGE, XLenVT, Expand); 220 221 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); 222 223 setOperationAction(ISD::VASTART, MVT::Other, Custom); 224 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); 225 226 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 227 228 setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); 229 230 if (!Subtarget.hasStdExtZbb()) 231 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); 232 233 if (Subtarget.is64Bit()) { 234 setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); 235 236 setOperationAction(ISD::LOAD, MVT::i32, Custom); 237 238 setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL}, 239 MVT::i32, Custom); 240 241 setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT}, 242 MVT::i32, Custom); 243 } else { 244 setLibcallName( 245 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128}, 246 nullptr); 247 setLibcallName(RTLIB::MULO_I64, nullptr); 248 } 249 250 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) { 251 setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand); 252 } else { 253 if (Subtarget.is64Bit()) { 254 setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom); 255 } else { 256 setOperationAction(ISD::MUL, MVT::i64, Custom); 257 } 258 } 259 260 if (!Subtarget.hasStdExtM()) { 261 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, 262 XLenVT, Expand); 263 } else { 264 if (Subtarget.is64Bit()) { 265 setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, 266 {MVT::i8, MVT::i16, MVT::i32}, Custom); 267 } 268 } 269 270 setOperationAction( 271 {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, 272 Expand); 273 274 setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT, 275 Custom); 276 277 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) { 278 if (Subtarget.is64Bit()) 279 setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom); 280 } else { 281 setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand); 282 } 283 284 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 285 // pattern match it directly in isel. 286 setOperationAction(ISD::BSWAP, XLenVT, 287 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) 288 ? Legal 289 : Expand); 290 // Zbkb can use rev8+brev8 to implement bitreverse. 291 setOperationAction(ISD::BITREVERSE, XLenVT, 292 Subtarget.hasStdExtZbkb() ? Custom : Expand); 293 294 if (Subtarget.hasStdExtZbb()) { 295 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT, 296 Legal); 297 298 if (Subtarget.is64Bit()) 299 setOperationAction( 300 {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, 301 MVT::i32, Custom); 302 } else { 303 setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand); 304 } 305 306 if (Subtarget.is64Bit()) 307 setOperationAction(ISD::ABS, MVT::i32, Custom); 308 309 if (!Subtarget.hasVendorXVentanaCondOps()) 310 setOperationAction(ISD::SELECT, XLenVT, Custom); 311 312 static const unsigned FPLegalNodeTypes[] = { 313 ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, 314 ISD::LLRINT, ISD::LROUND, ISD::LLROUND, 315 ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, 316 ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD, 317 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 318 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS}; 319 320 static const ISD::CondCode FPCCToExpand[] = { 321 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 322 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 323 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 324 325 static const unsigned FPOpToExpand[] = { 326 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, 327 ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; 328 329 static const unsigned FPRndMode[] = { 330 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 331 ISD::FROUNDEVEN}; 332 333 if (Subtarget.hasStdExtZfhOrZfhmin()) 334 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 335 336 if (Subtarget.hasStdExtZfhOrZfhmin()) { 337 if (Subtarget.hasStdExtZfh()) { 338 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); 339 setOperationAction(FPRndMode, MVT::f16, Custom); 340 setOperationAction(ISD::SELECT, MVT::f16, Custom); 341 } else { 342 static const unsigned ZfhminPromoteOps[] = { 343 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, 344 ISD::FSUB, ISD::FMUL, ISD::FMA, 345 ISD::FDIV, ISD::FSQRT, ISD::FABS, 346 ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD, 347 ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, 348 ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, 349 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR, 350 ISD::FTRUNC, ISD::FRINT, ISD::FROUND, 351 ISD::FROUNDEVEN, ISD::SELECT}; 352 353 setOperationAction(ZfhminPromoteOps, MVT::f16, Promote); 354 setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT, 355 ISD::STRICT_LROUND, ISD::STRICT_LLROUND}, 356 MVT::f16, Legal); 357 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the 358 // DAGCombiner::visitFP_ROUND probably needs improvements first. 359 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); 360 } 361 362 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); 363 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); 364 setCondCodeAction(FPCCToExpand, MVT::f16, Expand); 365 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 366 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 367 368 setOperationAction({ISD::FREM, ISD::FNEARBYINT, ISD::FPOW, ISD::FPOWI, 369 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP, 370 ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10}, 371 MVT::f16, Promote); 372 373 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have 374 // complete support for all operations in LegalizeDAG. 375 setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, 376 ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT, 377 ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, 378 ISD::STRICT_FTRUNC}, 379 MVT::f16, Promote); 380 381 // We need to custom promote this. 382 if (Subtarget.is64Bit()) 383 setOperationAction(ISD::FPOWI, MVT::i32, Custom); 384 } 385 386 if (Subtarget.hasStdExtF()) { 387 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); 388 setOperationAction(FPRndMode, MVT::f32, Custom); 389 setCondCodeAction(FPCCToExpand, MVT::f32, Expand); 390 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 391 setOperationAction(ISD::SELECT, MVT::f32, Custom); 392 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 393 setOperationAction(FPOpToExpand, MVT::f32, Expand); 394 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 395 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 396 } 397 398 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 399 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 400 401 if (Subtarget.hasStdExtD()) { 402 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); 403 if (Subtarget.is64Bit()) { 404 setOperationAction(FPRndMode, MVT::f64, Custom); 405 } 406 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); 407 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); 408 setCondCodeAction(FPCCToExpand, MVT::f64, Expand); 409 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 410 setOperationAction(ISD::SELECT, MVT::f64, Custom); 411 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 412 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 413 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 414 setOperationAction(FPOpToExpand, MVT::f64, Expand); 415 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 416 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 417 } 418 419 if (Subtarget.is64Bit()) 420 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT, 421 ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, 422 MVT::i32, Custom); 423 424 if (Subtarget.hasStdExtF()) { 425 setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT, 426 Custom); 427 428 setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT, 429 ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, 430 XLenVT, Legal); 431 432 setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom); 433 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); 434 } 435 436 setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, 437 ISD::JumpTable}, 438 XLenVT, Custom); 439 440 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 441 442 if (Subtarget.is64Bit()) 443 setOperationAction(ISD::Constant, MVT::i64, Custom); 444 445 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 446 // Unfortunately this can't be determined just from the ISA naming string. 447 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 448 Subtarget.is64Bit() ? Legal : Custom); 449 450 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); 451 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 452 if (Subtarget.is64Bit()) 453 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 454 455 if (Subtarget.hasStdExtA()) { 456 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 457 setMinCmpXchgSizeInBits(32); 458 } else if (Subtarget.hasForcedAtomics()) { 459 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 460 } else { 461 setMaxAtomicSizeInBitsSupported(0); 462 } 463 464 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 465 466 setBooleanContents(ZeroOrOneBooleanContent); 467 468 if (Subtarget.hasVInstructions()) { 469 setBooleanVectorContents(ZeroOrOneBooleanContent); 470 471 setOperationAction(ISD::VSCALE, XLenVT, Custom); 472 473 // RVV intrinsics may have illegal operands. 474 // We also need to custom legalize vmv.x.s. 475 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, 476 {MVT::i8, MVT::i16}, Custom); 477 if (Subtarget.is64Bit()) 478 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 479 else 480 setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN}, 481 MVT::i64, Custom); 482 483 setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID}, 484 MVT::Other, Custom); 485 486 static const unsigned IntegerVPOps[] = { 487 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, 488 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, 489 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, 490 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, 491 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 492 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, 493 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, 494 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, 495 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, 496 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN, 497 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, 498 ISD::VP_ABS}; 499 500 static const unsigned FloatingPointVPOps[] = { 501 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, 502 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, 503 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, 504 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, 505 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, 506 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, 507 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, 508 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, 509 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, 510 ISD::VP_FRINT, ISD::VP_FNEARBYINT}; 511 512 static const unsigned IntegerVecReduceOps[] = { 513 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, 514 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, 515 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; 516 517 static const unsigned FloatingPointVecReduceOps[] = { 518 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, 519 ISD::VECREDUCE_FMAX}; 520 521 if (!Subtarget.is64Bit()) { 522 // We must custom-lower certain vXi64 operations on RV32 due to the vector 523 // element type being illegal. 524 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 525 MVT::i64, Custom); 526 527 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); 528 529 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 530 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, 531 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, 532 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}, 533 MVT::i64, Custom); 534 } 535 536 for (MVT VT : BoolVecVTs) { 537 if (!isTypeLegal(VT)) 538 continue; 539 540 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 541 542 // Mask VTs are custom-expanded into a series of standard nodes 543 setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, 544 ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, 545 VT, Custom); 546 547 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 548 Custom); 549 550 setOperationAction(ISD::SELECT, VT, Custom); 551 setOperationAction( 552 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT, 553 Expand); 554 555 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom); 556 557 setOperationAction( 558 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 559 Custom); 560 561 setOperationAction( 562 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 563 Custom); 564 565 // RVV has native int->float & float->int conversions where the 566 // element type sizes are within one power-of-two of each other. Any 567 // wider distances between type sizes have to be lowered as sequences 568 // which progressively narrow the gap in stages. 569 setOperationAction( 570 {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, 571 VT, Custom); 572 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 573 Custom); 574 575 // Expand all extending loads to types larger than this, and truncating 576 // stores from types larger than this. 577 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 578 setTruncStoreAction(OtherVT, VT, Expand); 579 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT, 580 VT, Expand); 581 } 582 583 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 584 ISD::VP_TRUNCATE, ISD::VP_SETCC}, 585 VT, Custom); 586 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 587 588 setOperationPromotedToType( 589 ISD::VECTOR_SPLICE, VT, 590 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount())); 591 } 592 593 for (MVT VT : IntVecVTs) { 594 if (!isTypeLegal(VT)) 595 continue; 596 597 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 598 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 599 600 // Vectors implement MULHS/MULHU. 601 setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); 602 603 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. 604 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) 605 setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand); 606 607 setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, 608 Legal); 609 610 setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand); 611 612 setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, VT, Expand); 613 614 setOperationAction(ISD::BSWAP, VT, Expand); 615 setOperationAction({ISD::VP_BSWAP, ISD::VP_BITREVERSE}, VT, Expand); 616 setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand); 617 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ, 618 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP}, 619 VT, Expand); 620 621 // Custom-lower extensions and truncations from/to mask types. 622 setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, 623 VT, Custom); 624 625 // RVV has native int->float & float->int conversions where the 626 // element type sizes are within one power-of-two of each other. Any 627 // wider distances between type sizes have to be lowered as sequences 628 // which progressively narrow the gap in stages. 629 setOperationAction( 630 {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, 631 VT, Custom); 632 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 633 Custom); 634 635 setOperationAction( 636 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); 637 638 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 639 // nodes which truncate by one power of two at a time. 640 setOperationAction(ISD::TRUNCATE, VT, Custom); 641 642 // Custom-lower insert/extract operations to simplify patterns. 643 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 644 Custom); 645 646 // Custom-lower reduction operations to set up the corresponding custom 647 // nodes' operands. 648 setOperationAction(IntegerVecReduceOps, VT, Custom); 649 650 setOperationAction(IntegerVPOps, VT, Custom); 651 652 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 653 654 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 655 VT, Custom); 656 657 setOperationAction( 658 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 659 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 660 VT, Custom); 661 662 setOperationAction( 663 {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, 664 VT, Custom); 665 666 setOperationAction(ISD::SELECT, VT, Custom); 667 setOperationAction(ISD::SELECT_CC, VT, Expand); 668 669 setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom); 670 671 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 672 setTruncStoreAction(VT, OtherVT, Expand); 673 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT, 674 VT, Expand); 675 } 676 677 // Splice 678 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); 679 680 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the range 681 // of f32. 682 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 683 if (isTypeLegal(FloatVT)) { 684 setOperationAction( 685 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, 686 Custom); 687 } 688 } 689 690 // Expand various CCs to best match the RVV ISA, which natively supports UNE 691 // but no other unordered comparisons, and supports all ordered comparisons 692 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 693 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 694 // and we pattern-match those back to the "original", swapping operands once 695 // more. This way we catch both operations and both "vf" and "fv" forms with 696 // fewer patterns. 697 static const ISD::CondCode VFPCCToExpand[] = { 698 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 699 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 700 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 701 }; 702 703 // Sets common operation actions on RVV floating-point vector types. 704 const auto SetCommonVFPActions = [&](MVT VT) { 705 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 706 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 707 // sizes are within one power-of-two of each other. Therefore conversions 708 // between vXf16 and vXf64 must be lowered as sequences which convert via 709 // vXf32. 710 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 711 // Custom-lower insert/extract operations to simplify patterns. 712 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, 713 Custom); 714 // Expand various condition codes (explained above). 715 setCondCodeAction(VFPCCToExpand, VT, Expand); 716 717 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); 718 719 setOperationAction( 720 {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN}, 721 VT, Custom); 722 723 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 724 725 // Expand FP operations that need libcalls. 726 setOperationAction(ISD::FREM, VT, Expand); 727 setOperationAction(ISD::FPOW, VT, Expand); 728 setOperationAction(ISD::FCOS, VT, Expand); 729 setOperationAction(ISD::FSIN, VT, Expand); 730 setOperationAction(ISD::FSINCOS, VT, Expand); 731 setOperationAction(ISD::FEXP, VT, Expand); 732 setOperationAction(ISD::FEXP2, VT, Expand); 733 setOperationAction(ISD::FLOG, VT, Expand); 734 setOperationAction(ISD::FLOG2, VT, Expand); 735 setOperationAction(ISD::FLOG10, VT, Expand); 736 setOperationAction(ISD::FRINT, VT, Expand); 737 setOperationAction(ISD::FNEARBYINT, VT, Expand); 738 739 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 740 741 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 742 743 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, 744 VT, Custom); 745 746 setOperationAction( 747 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 748 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, 749 VT, Custom); 750 751 setOperationAction(ISD::SELECT, VT, Custom); 752 setOperationAction(ISD::SELECT_CC, VT, Expand); 753 754 setOperationAction( 755 {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, 756 VT, Custom); 757 758 setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom); 759 760 setOperationAction(FloatingPointVPOps, VT, Custom); 761 }; 762 763 // Sets common extload/truncstore actions on RVV floating-point vector 764 // types. 765 const auto SetCommonVFPExtLoadTruncStoreActions = 766 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { 767 for (auto SmallVT : SmallerVTs) { 768 setTruncStoreAction(VT, SmallVT, Expand); 769 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); 770 } 771 }; 772 773 if (Subtarget.hasVInstructionsF16()) { 774 for (MVT VT : F16VecVTs) { 775 if (!isTypeLegal(VT)) 776 continue; 777 SetCommonVFPActions(VT); 778 } 779 } 780 781 if (Subtarget.hasVInstructionsF32()) { 782 for (MVT VT : F32VecVTs) { 783 if (!isTypeLegal(VT)) 784 continue; 785 SetCommonVFPActions(VT); 786 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 787 } 788 } 789 790 if (Subtarget.hasVInstructionsF64()) { 791 for (MVT VT : F64VecVTs) { 792 if (!isTypeLegal(VT)) 793 continue; 794 SetCommonVFPActions(VT); 795 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 796 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); 797 } 798 } 799 800 if (Subtarget.useRVVForFixedLengthVectors()) { 801 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 802 if (!useRVVForFixedLengthVectorVT(VT)) 803 continue; 804 805 // By default everything must be expanded. 806 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 807 setOperationAction(Op, VT, Expand); 808 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { 809 setTruncStoreAction(VT, OtherVT, Expand); 810 setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, 811 OtherVT, VT, Expand); 812 } 813 814 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 815 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, 816 Custom); 817 818 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT, 819 Custom); 820 821 setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, 822 VT, Custom); 823 824 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); 825 826 setOperationAction(ISD::SETCC, VT, Custom); 827 828 setOperationAction(ISD::SELECT, VT, Custom); 829 830 setOperationAction(ISD::TRUNCATE, VT, Custom); 831 832 setOperationAction(ISD::BITCAST, VT, Custom); 833 834 setOperationAction( 835 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT, 836 Custom); 837 838 setOperationAction( 839 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT, 840 Custom); 841 842 setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, 843 ISD::FP_TO_UINT}, 844 VT, Custom); 845 setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, 846 Custom); 847 848 // Operations below are different for between masks and other vectors. 849 if (VT.getVectorElementType() == MVT::i1) { 850 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND, 851 ISD::OR, ISD::XOR}, 852 VT, Custom); 853 854 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, 855 ISD::VP_SETCC, ISD::VP_TRUNCATE}, 856 VT, Custom); 857 continue; 858 } 859 860 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to 861 // it before type legalization for i64 vectors on RV32. It will then be 862 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. 863 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs 864 // improvements first. 865 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { 866 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 867 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 868 } 869 870 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 871 872 setOperationAction( 873 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom); 874 875 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 876 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 877 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 878 ISD::VP_SCATTER}, 879 VT, Custom); 880 881 setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, 882 ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, 883 ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL}, 884 VT, Custom); 885 886 setOperationAction( 887 {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom); 888 889 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. 890 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) 891 setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); 892 893 setOperationAction( 894 {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, 895 Custom); 896 897 setOperationAction(ISD::VSELECT, VT, Custom); 898 setOperationAction(ISD::SELECT_CC, VT, Expand); 899 900 setOperationAction( 901 {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom); 902 903 // Custom-lower reduction operations to set up the corresponding custom 904 // nodes' operands. 905 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX, 906 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX, 907 ISD::VECREDUCE_UMIN}, 908 VT, Custom); 909 910 setOperationAction(IntegerVPOps, VT, Custom); 911 912 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the 913 // range of f32. 914 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 915 if (isTypeLegal(FloatVT)) 916 setOperationAction( 917 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, 918 Custom); 919 } 920 921 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 922 if (!useRVVForFixedLengthVectorVT(VT)) 923 continue; 924 925 // By default everything must be expanded. 926 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 927 setOperationAction(Op, VT, Expand); 928 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { 929 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 930 setTruncStoreAction(VT, OtherVT, Expand); 931 } 932 933 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 934 setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT, 935 Custom); 936 937 setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, 938 ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT, 939 ISD::EXTRACT_VECTOR_ELT}, 940 VT, Custom); 941 942 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, 943 ISD::MGATHER, ISD::MSCATTER}, 944 VT, Custom); 945 946 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, 947 ISD::EXPERIMENTAL_VP_STRIDED_LOAD, 948 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, 949 ISD::VP_SCATTER}, 950 VT, Custom); 951 952 setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, 953 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, 954 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM}, 955 VT, Custom); 956 957 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); 958 959 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, 960 ISD::FROUNDEVEN}, 961 VT, Custom); 962 963 setCondCodeAction(VFPCCToExpand, VT, Expand); 964 965 setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); 966 setOperationAction(ISD::SELECT_CC, VT, Expand); 967 968 setOperationAction(ISD::BITCAST, VT, Custom); 969 970 setOperationAction(FloatingPointVecReduceOps, VT, Custom); 971 972 setOperationAction(FloatingPointVPOps, VT, Custom); 973 } 974 975 // Custom-legalize bitcasts from fixed-length vectors to scalar types. 976 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64}, 977 Custom); 978 if (Subtarget.hasStdExtZfhOrZfhmin()) 979 setOperationAction(ISD::BITCAST, MVT::f16, Custom); 980 if (Subtarget.hasStdExtF()) 981 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 982 if (Subtarget.hasStdExtD()) 983 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 984 } 985 } 986 987 if (Subtarget.hasForcedAtomics()) { 988 // Set atomic rmw/cas operations to expand to force __sync libcalls. 989 setOperationAction( 990 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD, 991 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR, 992 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN, 993 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX}, 994 XLenVT, Expand); 995 } 996 997 // Function alignments. 998 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4); 999 setMinFunctionAlignment(FunctionAlignment); 1000 setPrefFunctionAlignment(FunctionAlignment); 1001 1002 setMinimumJumpTableEntries(5); 1003 1004 // Jumps are expensive, compared to logic 1005 setJumpIsExpensive(); 1006 1007 setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, 1008 ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); 1009 if (Subtarget.is64Bit()) 1010 setTargetDAGCombine(ISD::SRA); 1011 1012 if (Subtarget.hasStdExtF()) 1013 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM}); 1014 1015 if (Subtarget.hasStdExtZbb()) 1016 setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}); 1017 1018 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) 1019 setTargetDAGCombine(ISD::TRUNCATE); 1020 1021 if (Subtarget.hasStdExtZbkb()) 1022 setTargetDAGCombine(ISD::BITREVERSE); 1023 if (Subtarget.hasStdExtZfhOrZfhmin()) 1024 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); 1025 if (Subtarget.hasStdExtF()) 1026 setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT, 1027 ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}); 1028 if (Subtarget.hasVInstructions()) 1029 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER, 1030 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL, 1031 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR}); 1032 if (Subtarget.useRVVForFixedLengthVectors()) 1033 setTargetDAGCombine(ISD::BITCAST); 1034 1035 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); 1036 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); 1037 } 1038 1039 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 1040 LLVMContext &Context, 1041 EVT VT) const { 1042 if (!VT.isVector()) 1043 return getPointerTy(DL); 1044 if (Subtarget.hasVInstructions() && 1045 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 1046 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 1047 return VT.changeVectorElementTypeToInteger(); 1048 } 1049 1050 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { 1051 return Subtarget.getXLenVT(); 1052 } 1053 1054 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1055 const CallInst &I, 1056 MachineFunction &MF, 1057 unsigned Intrinsic) const { 1058 auto &DL = I.getModule()->getDataLayout(); 1059 switch (Intrinsic) { 1060 default: 1061 return false; 1062 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 1063 case Intrinsic::riscv_masked_atomicrmw_add_i32: 1064 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 1065 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 1066 case Intrinsic::riscv_masked_atomicrmw_max_i32: 1067 case Intrinsic::riscv_masked_atomicrmw_min_i32: 1068 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 1069 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 1070 case Intrinsic::riscv_masked_cmpxchg_i32: 1071 Info.opc = ISD::INTRINSIC_W_CHAIN; 1072 Info.memVT = MVT::i32; 1073 Info.ptrVal = I.getArgOperand(0); 1074 Info.offset = 0; 1075 Info.align = Align(4); 1076 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 1077 MachineMemOperand::MOVolatile; 1078 return true; 1079 case Intrinsic::riscv_masked_strided_load: 1080 Info.opc = ISD::INTRINSIC_W_CHAIN; 1081 Info.ptrVal = I.getArgOperand(1); 1082 Info.memVT = getValueType(DL, I.getType()->getScalarType()); 1083 Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8); 1084 Info.size = MemoryLocation::UnknownSize; 1085 Info.flags |= MachineMemOperand::MOLoad; 1086 return true; 1087 case Intrinsic::riscv_masked_strided_store: 1088 Info.opc = ISD::INTRINSIC_VOID; 1089 Info.ptrVal = I.getArgOperand(1); 1090 Info.memVT = 1091 getValueType(DL, I.getArgOperand(0)->getType()->getScalarType()); 1092 Info.align = Align( 1093 DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) / 1094 8); 1095 Info.size = MemoryLocation::UnknownSize; 1096 Info.flags |= MachineMemOperand::MOStore; 1097 return true; 1098 case Intrinsic::riscv_seg2_load: 1099 case Intrinsic::riscv_seg3_load: 1100 case Intrinsic::riscv_seg4_load: 1101 case Intrinsic::riscv_seg5_load: 1102 case Intrinsic::riscv_seg6_load: 1103 case Intrinsic::riscv_seg7_load: 1104 case Intrinsic::riscv_seg8_load: 1105 Info.opc = ISD::INTRINSIC_W_CHAIN; 1106 Info.ptrVal = I.getArgOperand(0); 1107 Info.memVT = 1108 getValueType(DL, I.getType()->getStructElementType(0)->getScalarType()); 1109 Info.align = 1110 Align(DL.getTypeSizeInBits( 1111 I.getType()->getStructElementType(0)->getScalarType()) / 1112 8); 1113 Info.size = MemoryLocation::UnknownSize; 1114 Info.flags |= MachineMemOperand::MOLoad; 1115 return true; 1116 } 1117 } 1118 1119 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1120 const AddrMode &AM, Type *Ty, 1121 unsigned AS, 1122 Instruction *I) const { 1123 // No global is ever allowed as a base. 1124 if (AM.BaseGV) 1125 return false; 1126 1127 // RVV instructions only support register addressing. 1128 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty)) 1129 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs; 1130 1131 // Require a 12-bit signed offset. 1132 if (!isInt<12>(AM.BaseOffs)) 1133 return false; 1134 1135 switch (AM.Scale) { 1136 case 0: // "r+i" or just "i", depending on HasBaseReg. 1137 break; 1138 case 1: 1139 if (!AM.HasBaseReg) // allow "r+i". 1140 break; 1141 return false; // disallow "r+r" or "r+r+i". 1142 default: 1143 return false; 1144 } 1145 1146 return true; 1147 } 1148 1149 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 1150 return isInt<12>(Imm); 1151 } 1152 1153 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 1154 return isInt<12>(Imm); 1155 } 1156 1157 // On RV32, 64-bit integers are split into their high and low parts and held 1158 // in two different registers, so the trunc is free since the low register can 1159 // just be used. 1160 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of 1161 // isTruncateFree? 1162 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 1163 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 1164 return false; 1165 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 1166 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 1167 return (SrcBits == 64 && DestBits == 32); 1168 } 1169 1170 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 1171 // We consider i64->i32 free on RV64 since we have good selection of W 1172 // instructions that make promoting operations back to i64 free in many cases. 1173 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || 1174 !DstVT.isInteger()) 1175 return false; 1176 unsigned SrcBits = SrcVT.getSizeInBits(); 1177 unsigned DestBits = DstVT.getSizeInBits(); 1178 return (SrcBits == 64 && DestBits == 32); 1179 } 1180 1181 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 1182 // Zexts are free if they can be combined with a load. 1183 // Don't advertise i32->i64 zextload as being free for RV64. It interacts 1184 // poorly with type legalization of compares preferring sext. 1185 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 1186 EVT MemVT = LD->getMemoryVT(); 1187 if ((MemVT == MVT::i8 || MemVT == MVT::i16) && 1188 (LD->getExtensionType() == ISD::NON_EXTLOAD || 1189 LD->getExtensionType() == ISD::ZEXTLOAD)) 1190 return true; 1191 } 1192 1193 return TargetLowering::isZExtFree(Val, VT2); 1194 } 1195 1196 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 1197 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 1198 } 1199 1200 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const { 1201 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32); 1202 } 1203 1204 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { 1205 return Subtarget.hasStdExtZbb(); 1206 } 1207 1208 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { 1209 return Subtarget.hasStdExtZbb(); 1210 } 1211 1212 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( 1213 const Instruction &AndI) const { 1214 // We expect to be able to match a bit extraction instruction if the Zbs 1215 // extension is supported and the mask is a power of two. However, we 1216 // conservatively return false if the mask would fit in an ANDI instruction, 1217 // on the basis that it's possible the sinking+duplication of the AND in 1218 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction 1219 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). 1220 if (!Subtarget.hasStdExtZbs()) 1221 return false; 1222 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); 1223 if (!Mask) 1224 return false; 1225 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2(); 1226 } 1227 1228 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { 1229 EVT VT = Y.getValueType(); 1230 1231 // FIXME: Support vectors once we have tests. 1232 if (VT.isVector()) 1233 return false; 1234 1235 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) && 1236 !isa<ConstantSDNode>(Y); 1237 } 1238 1239 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const { 1240 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test. 1241 if (Subtarget.hasStdExtZbs()) 1242 return X.getValueType().isScalarInteger(); 1243 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position. 1244 auto *C = dyn_cast<ConstantSDNode>(Y); 1245 return C && C->getAPIntValue().ule(10); 1246 } 1247 1248 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, 1249 EVT VT) const { 1250 // Only enable for rvv. 1251 if (!VT.isVector() || !Subtarget.hasVInstructions()) 1252 return false; 1253 1254 if (VT.isFixedLengthVector() && !isTypeLegal(VT)) 1255 return false; 1256 1257 return true; 1258 } 1259 1260 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 1261 Type *Ty) const { 1262 assert(Ty->isIntegerTy()); 1263 1264 unsigned BitSize = Ty->getIntegerBitWidth(); 1265 if (BitSize > Subtarget.getXLen()) 1266 return false; 1267 1268 // Fast path, assume 32-bit immediates are cheap. 1269 int64_t Val = Imm.getSExtValue(); 1270 if (isInt<32>(Val)) 1271 return true; 1272 1273 // A constant pool entry may be more aligned thant he load we're trying to 1274 // replace. If we don't support unaligned scalar mem, prefer the constant 1275 // pool. 1276 // TODO: Can the caller pass down the alignment? 1277 if (!Subtarget.enableUnalignedScalarMem()) 1278 return true; 1279 1280 // Prefer to keep the load if it would require many instructions. 1281 // This uses the same threshold we use for constant pools but doesn't 1282 // check useConstantPoolForLargeInts. 1283 // TODO: Should we keep the load only when we're definitely going to emit a 1284 // constant pool? 1285 1286 RISCVMatInt::InstSeq Seq = 1287 RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits()); 1288 return Seq.size() <= Subtarget.getMaxBuildIntsCost(); 1289 } 1290 1291 bool RISCVTargetLowering:: 1292 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1293 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1294 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1295 SelectionDAG &DAG) const { 1296 // One interesting pattern that we'd want to form is 'bit extract': 1297 // ((1 >> Y) & 1) ==/!= 0 1298 // But we also need to be careful not to try to reverse that fold. 1299 1300 // Is this '((1 >> Y) & 1)'? 1301 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne()) 1302 return false; // Keep the 'bit extract' pattern. 1303 1304 // Will this be '((1 >> Y) & 1)' after the transform? 1305 if (NewShiftOpcode == ISD::SRL && CC->isOne()) 1306 return true; // Do form the 'bit extract' pattern. 1307 1308 // If 'X' is a constant, and we transform, then we will immediately 1309 // try to undo the fold, thus causing endless combine loop. 1310 // So only do the transform if X is not a constant. This matches the default 1311 // implementation of this function. 1312 return !XC; 1313 } 1314 1315 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const { 1316 switch (Opcode) { 1317 case Instruction::Add: 1318 case Instruction::Sub: 1319 case Instruction::Mul: 1320 case Instruction::And: 1321 case Instruction::Or: 1322 case Instruction::Xor: 1323 case Instruction::FAdd: 1324 case Instruction::FSub: 1325 case Instruction::FMul: 1326 case Instruction::FDiv: 1327 case Instruction::ICmp: 1328 case Instruction::FCmp: 1329 return true; 1330 case Instruction::Shl: 1331 case Instruction::LShr: 1332 case Instruction::AShr: 1333 case Instruction::UDiv: 1334 case Instruction::SDiv: 1335 case Instruction::URem: 1336 case Instruction::SRem: 1337 return Operand == 1; 1338 default: 1339 return false; 1340 } 1341 } 1342 1343 1344 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const { 1345 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 1346 return false; 1347 1348 if (canSplatOperand(I->getOpcode(), Operand)) 1349 return true; 1350 1351 auto *II = dyn_cast<IntrinsicInst>(I); 1352 if (!II) 1353 return false; 1354 1355 switch (II->getIntrinsicID()) { 1356 case Intrinsic::fma: 1357 case Intrinsic::vp_fma: 1358 return Operand == 0 || Operand == 1; 1359 case Intrinsic::vp_shl: 1360 case Intrinsic::vp_lshr: 1361 case Intrinsic::vp_ashr: 1362 case Intrinsic::vp_udiv: 1363 case Intrinsic::vp_sdiv: 1364 case Intrinsic::vp_urem: 1365 case Intrinsic::vp_srem: 1366 return Operand == 1; 1367 // These intrinsics are commutative. 1368 case Intrinsic::vp_add: 1369 case Intrinsic::vp_mul: 1370 case Intrinsic::vp_and: 1371 case Intrinsic::vp_or: 1372 case Intrinsic::vp_xor: 1373 case Intrinsic::vp_fadd: 1374 case Intrinsic::vp_fmul: 1375 // These intrinsics have 'vr' versions. 1376 case Intrinsic::vp_sub: 1377 case Intrinsic::vp_fsub: 1378 case Intrinsic::vp_fdiv: 1379 return Operand == 0 || Operand == 1; 1380 default: 1381 return false; 1382 } 1383 } 1384 1385 /// Check if sinking \p I's operands to I's basic block is profitable, because 1386 /// the operands can be folded into a target instruction, e.g. 1387 /// splats of scalars can fold into vector instructions. 1388 bool RISCVTargetLowering::shouldSinkOperands( 1389 Instruction *I, SmallVectorImpl<Use *> &Ops) const { 1390 using namespace llvm::PatternMatch; 1391 1392 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 1393 return false; 1394 1395 for (auto OpIdx : enumerate(I->operands())) { 1396 if (!canSplatOperand(I, OpIdx.index())) 1397 continue; 1398 1399 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get()); 1400 // Make sure we are not already sinking this operand 1401 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) 1402 continue; 1403 1404 // We are looking for a splat that can be sunk. 1405 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), 1406 m_Undef(), m_ZeroMask()))) 1407 continue; 1408 1409 // All uses of the shuffle should be sunk to avoid duplicating it across gpr 1410 // and vector registers 1411 for (Use &U : Op->uses()) { 1412 Instruction *Insn = cast<Instruction>(U.getUser()); 1413 if (!canSplatOperand(Insn, U.getOperandNo())) 1414 return false; 1415 } 1416 1417 Ops.push_back(&Op->getOperandUse(0)); 1418 Ops.push_back(&OpIdx.value()); 1419 } 1420 return true; 1421 } 1422 1423 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { 1424 unsigned Opc = VecOp.getOpcode(); 1425 1426 // Assume target opcodes can't be scalarized. 1427 // TODO - do we have any exceptions? 1428 if (Opc >= ISD::BUILTIN_OP_END) 1429 return false; 1430 1431 // If the vector op is not supported, try to convert to scalar. 1432 EVT VecVT = VecOp.getValueType(); 1433 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) 1434 return true; 1435 1436 // If the vector op is supported, but the scalar op is not, the transform may 1437 // not be worthwhile. 1438 EVT ScalarVT = VecVT.getScalarType(); 1439 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); 1440 } 1441 1442 bool RISCVTargetLowering::isOffsetFoldingLegal( 1443 const GlobalAddressSDNode *GA) const { 1444 // In order to maximise the opportunity for common subexpression elimination, 1445 // keep a separate ADD node for the global address offset instead of folding 1446 // it in the global address node. Later peephole optimisations may choose to 1447 // fold it back in when profitable. 1448 return false; 1449 } 1450 1451 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 1452 bool ForCodeSize) const { 1453 if (VT == MVT::f16 && !Subtarget.hasStdExtZfhOrZfhmin()) 1454 return false; 1455 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 1456 return false; 1457 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 1458 return false; 1459 return Imm.isZero(); 1460 } 1461 1462 // TODO: This is very conservative. 1463 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1464 unsigned Index) const { 1465 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) 1466 return false; 1467 1468 // Only support extracting a fixed from a fixed vector for now. 1469 if (ResVT.isScalableVector() || SrcVT.isScalableVector()) 1470 return false; 1471 1472 unsigned ResElts = ResVT.getVectorNumElements(); 1473 unsigned SrcElts = SrcVT.getVectorNumElements(); 1474 1475 // Convervatively only handle extracting half of a vector. 1476 // TODO: Relax this. 1477 if ((ResElts * 2) != SrcElts) 1478 return false; 1479 1480 // The smallest type we can slide is i8. 1481 // TODO: We can extract index 0 from a mask vector without a slide. 1482 if (ResVT.getVectorElementType() == MVT::i1) 1483 return false; 1484 1485 // Slide can support arbitrary index, but we only treat vslidedown.vi as 1486 // cheap. 1487 if (Index >= 32) 1488 return false; 1489 1490 // TODO: We can do arbitrary slidedowns, but for now only support extracting 1491 // the upper half of a vector until we have more test coverage. 1492 return Index == 0 || Index == ResElts; 1493 } 1494 1495 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 1496 return (VT == MVT::f16 && Subtarget.hasStdExtZfhOrZfhmin()) || 1497 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 1498 (VT == MVT::f64 && Subtarget.hasStdExtD()); 1499 } 1500 1501 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 1502 CallingConv::ID CC, 1503 EVT VT) const { 1504 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 1505 // We might still end up using a GPR but that will be decided based on ABI. 1506 if (VT == MVT::f16 && Subtarget.hasStdExtF() && 1507 !Subtarget.hasStdExtZfhOrZfhmin()) 1508 return MVT::f32; 1509 1510 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 1511 } 1512 1513 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 1514 CallingConv::ID CC, 1515 EVT VT) const { 1516 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled. 1517 // We might still end up using a GPR but that will be decided based on ABI. 1518 if (VT == MVT::f16 && Subtarget.hasStdExtF() && 1519 !Subtarget.hasStdExtZfhOrZfhmin()) 1520 return 1; 1521 1522 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 1523 } 1524 1525 // Changes the condition code and swaps operands if necessary, so the SetCC 1526 // operation matches one of the comparisons supported directly by branches 1527 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 1528 // with 1/-1. 1529 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 1530 ISD::CondCode &CC, SelectionDAG &DAG) { 1531 // If this is a single bit test that can't be handled by ANDI, shift the 1532 // bit to be tested to the MSB and perform a signed compare with 0. 1533 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) && 1534 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && 1535 isa<ConstantSDNode>(LHS.getOperand(1))) { 1536 uint64_t Mask = LHS.getConstantOperandVal(1); 1537 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) { 1538 unsigned ShAmt = 0; 1539 if (isPowerOf2_64(Mask)) { 1540 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 1541 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask); 1542 } else { 1543 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask); 1544 } 1545 1546 LHS = LHS.getOperand(0); 1547 if (ShAmt != 0) 1548 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS, 1549 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 1550 return; 1551 } 1552 } 1553 1554 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 1555 int64_t C = RHSC->getSExtValue(); 1556 switch (CC) { 1557 default: break; 1558 case ISD::SETGT: 1559 // Convert X > -1 to X >= 0. 1560 if (C == -1) { 1561 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 1562 CC = ISD::SETGE; 1563 return; 1564 } 1565 break; 1566 case ISD::SETLT: 1567 // Convert X < 1 to 0 <= X. 1568 if (C == 1) { 1569 RHS = LHS; 1570 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 1571 CC = ISD::SETGE; 1572 return; 1573 } 1574 break; 1575 } 1576 } 1577 1578 switch (CC) { 1579 default: 1580 break; 1581 case ISD::SETGT: 1582 case ISD::SETLE: 1583 case ISD::SETUGT: 1584 case ISD::SETULE: 1585 CC = ISD::getSetCCSwappedOperands(CC); 1586 std::swap(LHS, RHS); 1587 break; 1588 } 1589 } 1590 1591 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { 1592 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 1593 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 1594 if (VT.getVectorElementType() == MVT::i1) 1595 KnownSize *= 8; 1596 1597 switch (KnownSize) { 1598 default: 1599 llvm_unreachable("Invalid LMUL."); 1600 case 8: 1601 return RISCVII::VLMUL::LMUL_F8; 1602 case 16: 1603 return RISCVII::VLMUL::LMUL_F4; 1604 case 32: 1605 return RISCVII::VLMUL::LMUL_F2; 1606 case 64: 1607 return RISCVII::VLMUL::LMUL_1; 1608 case 128: 1609 return RISCVII::VLMUL::LMUL_2; 1610 case 256: 1611 return RISCVII::VLMUL::LMUL_4; 1612 case 512: 1613 return RISCVII::VLMUL::LMUL_8; 1614 } 1615 } 1616 1617 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { 1618 switch (LMul) { 1619 default: 1620 llvm_unreachable("Invalid LMUL."); 1621 case RISCVII::VLMUL::LMUL_F8: 1622 case RISCVII::VLMUL::LMUL_F4: 1623 case RISCVII::VLMUL::LMUL_F2: 1624 case RISCVII::VLMUL::LMUL_1: 1625 return RISCV::VRRegClassID; 1626 case RISCVII::VLMUL::LMUL_2: 1627 return RISCV::VRM2RegClassID; 1628 case RISCVII::VLMUL::LMUL_4: 1629 return RISCV::VRM4RegClassID; 1630 case RISCVII::VLMUL::LMUL_8: 1631 return RISCV::VRM8RegClassID; 1632 } 1633 } 1634 1635 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 1636 RISCVII::VLMUL LMUL = getLMUL(VT); 1637 if (LMUL == RISCVII::VLMUL::LMUL_F8 || 1638 LMUL == RISCVII::VLMUL::LMUL_F4 || 1639 LMUL == RISCVII::VLMUL::LMUL_F2 || 1640 LMUL == RISCVII::VLMUL::LMUL_1) { 1641 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 1642 "Unexpected subreg numbering"); 1643 return RISCV::sub_vrm1_0 + Index; 1644 } 1645 if (LMUL == RISCVII::VLMUL::LMUL_2) { 1646 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 1647 "Unexpected subreg numbering"); 1648 return RISCV::sub_vrm2_0 + Index; 1649 } 1650 if (LMUL == RISCVII::VLMUL::LMUL_4) { 1651 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 1652 "Unexpected subreg numbering"); 1653 return RISCV::sub_vrm4_0 + Index; 1654 } 1655 llvm_unreachable("Invalid vector type."); 1656 } 1657 1658 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 1659 if (VT.getVectorElementType() == MVT::i1) 1660 return RISCV::VRRegClassID; 1661 return getRegClassIDForLMUL(getLMUL(VT)); 1662 } 1663 1664 // Attempt to decompose a subvector insert/extract between VecVT and 1665 // SubVecVT via subregister indices. Returns the subregister index that 1666 // can perform the subvector insert/extract with the given element index, as 1667 // well as the index corresponding to any leftover subvectors that must be 1668 // further inserted/extracted within the register class for SubVecVT. 1669 std::pair<unsigned, unsigned> 1670 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1671 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 1672 const RISCVRegisterInfo *TRI) { 1673 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 1674 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 1675 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 1676 "Register classes not ordered"); 1677 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 1678 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 1679 // Try to compose a subregister index that takes us from the incoming 1680 // LMUL>1 register class down to the outgoing one. At each step we half 1681 // the LMUL: 1682 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 1683 // Note that this is not guaranteed to find a subregister index, such as 1684 // when we are extracting from one VR type to another. 1685 unsigned SubRegIdx = RISCV::NoSubRegister; 1686 for (const unsigned RCID : 1687 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 1688 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 1689 VecVT = VecVT.getHalfNumVectorElementsVT(); 1690 bool IsHi = 1691 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 1692 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 1693 getSubregIndexByMVT(VecVT, IsHi)); 1694 if (IsHi) 1695 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 1696 } 1697 return {SubRegIdx, InsertExtractIdx}; 1698 } 1699 1700 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar 1701 // stores for those types. 1702 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { 1703 return !Subtarget.useRVVForFixedLengthVectors() || 1704 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); 1705 } 1706 1707 bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const { 1708 if (ScalarTy->isPointerTy()) 1709 return true; 1710 1711 if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) || 1712 ScalarTy->isIntegerTy(32)) 1713 return true; 1714 1715 if (ScalarTy->isIntegerTy(64)) 1716 return Subtarget.hasVInstructionsI64(); 1717 1718 if (ScalarTy->isHalfTy()) 1719 return Subtarget.hasVInstructionsF16(); 1720 if (ScalarTy->isFloatTy()) 1721 return Subtarget.hasVInstructionsF32(); 1722 if (ScalarTy->isDoubleTy()) 1723 return Subtarget.hasVInstructionsF64(); 1724 1725 return false; 1726 } 1727 1728 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const { 1729 return NumRepeatedDivisors; 1730 } 1731 1732 static SDValue getVLOperand(SDValue Op) { 1733 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1734 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 1735 "Unexpected opcode"); 1736 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 1737 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 1738 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 1739 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 1740 if (!II) 1741 return SDValue(); 1742 return Op.getOperand(II->VLOperand + 1 + HasChain); 1743 } 1744 1745 static bool useRVVForFixedLengthVectorVT(MVT VT, 1746 const RISCVSubtarget &Subtarget) { 1747 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!"); 1748 if (!Subtarget.useRVVForFixedLengthVectors()) 1749 return false; 1750 1751 // We only support a set of vector types with a consistent maximum fixed size 1752 // across all supported vector element types to avoid legalization issues. 1753 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest 1754 // fixed-length vector type we support is 1024 bytes. 1755 if (VT.getFixedSizeInBits() > 1024 * 8) 1756 return false; 1757 1758 unsigned MinVLen = Subtarget.getRealMinVLen(); 1759 1760 MVT EltVT = VT.getVectorElementType(); 1761 1762 // Don't use RVV for vectors we cannot scalarize if required. 1763 switch (EltVT.SimpleTy) { 1764 // i1 is supported but has different rules. 1765 default: 1766 return false; 1767 case MVT::i1: 1768 // Masks can only use a single register. 1769 if (VT.getVectorNumElements() > MinVLen) 1770 return false; 1771 MinVLen /= 8; 1772 break; 1773 case MVT::i8: 1774 case MVT::i16: 1775 case MVT::i32: 1776 break; 1777 case MVT::i64: 1778 if (!Subtarget.hasVInstructionsI64()) 1779 return false; 1780 break; 1781 case MVT::f16: 1782 if (!Subtarget.hasVInstructionsF16()) 1783 return false; 1784 break; 1785 case MVT::f32: 1786 if (!Subtarget.hasVInstructionsF32()) 1787 return false; 1788 break; 1789 case MVT::f64: 1790 if (!Subtarget.hasVInstructionsF64()) 1791 return false; 1792 break; 1793 } 1794 1795 // Reject elements larger than ELEN. 1796 if (EltVT.getSizeInBits() > Subtarget.getELEN()) 1797 return false; 1798 1799 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); 1800 // Don't use RVV for types that don't fit. 1801 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 1802 return false; 1803 1804 // TODO: Perhaps an artificial restriction, but worth having whilst getting 1805 // the base fixed length RVV support in place. 1806 if (!VT.isPow2VectorType()) 1807 return false; 1808 1809 return true; 1810 } 1811 1812 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 1813 return ::useRVVForFixedLengthVectorVT(VT, Subtarget); 1814 } 1815 1816 // Return the largest legal scalable vector type that matches VT's element type. 1817 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, 1818 const RISCVSubtarget &Subtarget) { 1819 // This may be called before legal types are setup. 1820 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || 1821 useRVVForFixedLengthVectorVT(VT, Subtarget)) && 1822 "Expected legal fixed length vector!"); 1823 1824 unsigned MinVLen = Subtarget.getRealMinVLen(); 1825 unsigned MaxELen = Subtarget.getELEN(); 1826 1827 MVT EltVT = VT.getVectorElementType(); 1828 switch (EltVT.SimpleTy) { 1829 default: 1830 llvm_unreachable("unexpected element type for RVV container"); 1831 case MVT::i1: 1832 case MVT::i8: 1833 case MVT::i16: 1834 case MVT::i32: 1835 case MVT::i64: 1836 case MVT::f16: 1837 case MVT::f32: 1838 case MVT::f64: { 1839 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for 1840 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within 1841 // each fractional LMUL we support SEW between 8 and LMUL*ELEN. 1842 unsigned NumElts = 1843 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; 1844 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen); 1845 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts"); 1846 return MVT::getScalableVectorVT(EltVT, NumElts); 1847 } 1848 } 1849 } 1850 1851 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 1852 const RISCVSubtarget &Subtarget) { 1853 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 1854 Subtarget); 1855 } 1856 1857 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 1858 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget()); 1859 } 1860 1861 // Grow V to consume an entire RVV register. 1862 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1863 const RISCVSubtarget &Subtarget) { 1864 assert(VT.isScalableVector() && 1865 "Expected to convert into a scalable vector!"); 1866 assert(V.getValueType().isFixedLengthVector() && 1867 "Expected a fixed length vector operand!"); 1868 SDLoc DL(V); 1869 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1870 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 1871 } 1872 1873 // Shrink V so it's just big enough to maintain a VT's worth of data. 1874 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1875 const RISCVSubtarget &Subtarget) { 1876 assert(VT.isFixedLengthVector() && 1877 "Expected to convert into a fixed length vector!"); 1878 assert(V.getValueType().isScalableVector() && 1879 "Expected a scalable vector operand!"); 1880 SDLoc DL(V); 1881 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1882 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 1883 } 1884 1885 /// Return the type of the mask type suitable for masking the provided 1886 /// vector type. This is simply an i1 element type vector of the same 1887 /// (possibly scalable) length. 1888 static MVT getMaskTypeFor(MVT VecVT) { 1889 assert(VecVT.isVector()); 1890 ElementCount EC = VecVT.getVectorElementCount(); 1891 return MVT::getVectorVT(MVT::i1, EC); 1892 } 1893 1894 /// Creates an all ones mask suitable for masking a vector of type VecTy with 1895 /// vector length VL. . 1896 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, SDLoc DL, 1897 SelectionDAG &DAG) { 1898 MVT MaskVT = getMaskTypeFor(VecVT); 1899 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1900 } 1901 1902 static SDValue getVLOp(uint64_t NumElts, SDLoc DL, SelectionDAG &DAG, 1903 const RISCVSubtarget &Subtarget) { 1904 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT()); 1905 } 1906 1907 static std::pair<SDValue, SDValue> 1908 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1909 const RISCVSubtarget &Subtarget) { 1910 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1911 SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget); 1912 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); 1913 return {Mask, VL}; 1914 } 1915 1916 // Gets the two common "VL" operands: an all-ones mask and the vector length. 1917 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 1918 // the vector type that the fixed-length vector is contained in. Otherwise if 1919 // VecVT is scalable, then ContainerVT should be the same as VecVT. 1920 static std::pair<SDValue, SDValue> 1921 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1922 const RISCVSubtarget &Subtarget) { 1923 if (VecVT.isFixedLengthVector()) 1924 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG, 1925 Subtarget); 1926 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1927 MVT XLenVT = Subtarget.getXLenVT(); 1928 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT); 1929 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG); 1930 return {Mask, VL}; 1931 } 1932 1933 // As above but assuming the given type is a scalable vector type. 1934 static std::pair<SDValue, SDValue> 1935 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 1936 const RISCVSubtarget &Subtarget) { 1937 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 1938 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 1939 } 1940 1941 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 1942 // of either is (currently) supported. This can get us into an infinite loop 1943 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 1944 // as a ..., etc. 1945 // Until either (or both) of these can reliably lower any node, reporting that 1946 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 1947 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 1948 // which is not desirable. 1949 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 1950 EVT VT, unsigned DefinedValues) const { 1951 return false; 1952 } 1953 1954 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, 1955 const RISCVSubtarget &Subtarget) { 1956 // RISCV FP-to-int conversions saturate to the destination register size, but 1957 // don't produce 0 for nan. We can use a conversion instruction and fix the 1958 // nan case with a compare and a select. 1959 SDValue Src = Op.getOperand(0); 1960 1961 MVT DstVT = Op.getSimpleValueType(); 1962 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 1963 1964 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; 1965 1966 if (!DstVT.isVector()) { 1967 // In absense of Zfh, promote f16 to f32, then saturate the result. 1968 if (Src.getSimpleValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) { 1969 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src); 1970 } 1971 1972 unsigned Opc; 1973 if (SatVT == DstVT) 1974 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 1975 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 1976 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 1977 else 1978 return SDValue(); 1979 // FIXME: Support other SatVTs by clamping before or after the conversion. 1980 1981 SDLoc DL(Op); 1982 SDValue FpToInt = DAG.getNode( 1983 Opc, DL, DstVT, Src, 1984 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT())); 1985 1986 if (Opc == RISCVISD::FCVT_WU_RV64) 1987 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 1988 1989 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 1990 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, 1991 ISD::CondCode::SETUO); 1992 } 1993 1994 // Vectors. 1995 1996 MVT DstEltVT = DstVT.getVectorElementType(); 1997 MVT SrcVT = Src.getSimpleValueType(); 1998 MVT SrcEltVT = SrcVT.getVectorElementType(); 1999 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 2000 unsigned DstEltSize = DstEltVT.getSizeInBits(); 2001 2002 // Only handle saturating to the destination type. 2003 if (SatVT != DstEltVT) 2004 return SDValue(); 2005 2006 // FIXME: Don't support narrowing by more than 1 steps for now. 2007 if (SrcEltSize > (2 * DstEltSize)) 2008 return SDValue(); 2009 2010 MVT DstContainerVT = DstVT; 2011 MVT SrcContainerVT = SrcVT; 2012 if (DstVT.isFixedLengthVector()) { 2013 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget); 2014 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 2015 assert(DstContainerVT.getVectorElementCount() == 2016 SrcContainerVT.getVectorElementCount() && 2017 "Expected same element count"); 2018 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 2019 } 2020 2021 SDLoc DL(Op); 2022 2023 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget); 2024 2025 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), 2026 {Src, Src, DAG.getCondCode(ISD::SETNE), 2027 DAG.getUNDEF(Mask.getValueType()), Mask, VL}); 2028 2029 // Need to widen by more than 1 step, promote the FP type, then do a widening 2030 // convert. 2031 if (DstEltSize > (2 * SrcEltSize)) { 2032 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!"); 2033 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32); 2034 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL); 2035 } 2036 2037 unsigned RVVOpc = 2038 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 2039 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL); 2040 2041 SDValue SplatZero = DAG.getNode( 2042 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT), 2043 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 2044 Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero, 2045 Res, VL); 2046 2047 if (DstVT.isFixedLengthVector()) 2048 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget); 2049 2050 return Res; 2051 } 2052 2053 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) { 2054 switch (Opc) { 2055 case ISD::FROUNDEVEN: 2056 case ISD::VP_FROUNDEVEN: 2057 return RISCVFPRndMode::RNE; 2058 case ISD::FTRUNC: 2059 case ISD::VP_FROUNDTOZERO: 2060 return RISCVFPRndMode::RTZ; 2061 case ISD::FFLOOR: 2062 case ISD::VP_FFLOOR: 2063 return RISCVFPRndMode::RDN; 2064 case ISD::FCEIL: 2065 case ISD::VP_FCEIL: 2066 return RISCVFPRndMode::RUP; 2067 case ISD::FROUND: 2068 case ISD::VP_FROUND: 2069 return RISCVFPRndMode::RMM; 2070 case ISD::FRINT: 2071 return RISCVFPRndMode::DYN; 2072 } 2073 2074 return RISCVFPRndMode::Invalid; 2075 } 2076 2077 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND 2078 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to 2079 // the integer domain and back. Taking care to avoid converting values that are 2080 // nan or already correct. 2081 static SDValue 2082 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 2083 const RISCVSubtarget &Subtarget) { 2084 MVT VT = Op.getSimpleValueType(); 2085 assert(VT.isVector() && "Unexpected type"); 2086 2087 SDLoc DL(Op); 2088 2089 SDValue Src = Op.getOperand(0); 2090 2091 MVT ContainerVT = VT; 2092 if (VT.isFixedLengthVector()) { 2093 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2094 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2095 } 2096 2097 SDValue Mask, VL; 2098 if (Op->isVPOpcode()) { 2099 Mask = Op.getOperand(1); 2100 VL = Op.getOperand(2); 2101 } else { 2102 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2103 } 2104 2105 // Freeze the source since we are increasing the number of uses. 2106 Src = DAG.getFreeze(Src); 2107 2108 // We do the conversion on the absolute value and fix the sign at the end. 2109 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL); 2110 2111 // Determine the largest integer that can be represented exactly. This and 2112 // values larger than it don't have any fractional bits so don't need to 2113 // be converted. 2114 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT); 2115 unsigned Precision = APFloat::semanticsPrecision(FltSem); 2116 APFloat MaxVal = APFloat(FltSem); 2117 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 2118 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 2119 SDValue MaxValNode = 2120 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType()); 2121 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT, 2122 DAG.getUNDEF(ContainerVT), MaxValNode, VL); 2123 2124 // If abs(Src) was larger than MaxVal or nan, keep it. 2125 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 2126 Mask = 2127 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT, 2128 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), 2129 Mask, Mask, VL}); 2130 2131 // Truncate to integer and convert back to FP. 2132 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger(); 2133 MVT XLenVT = Subtarget.getXLenVT(); 2134 SDValue Truncated; 2135 2136 switch (Op.getOpcode()) { 2137 default: 2138 llvm_unreachable("Unexpected opcode"); 2139 case ISD::FCEIL: 2140 case ISD::VP_FCEIL: 2141 case ISD::FFLOOR: 2142 case ISD::VP_FFLOOR: 2143 case ISD::FROUND: 2144 case ISD::FROUNDEVEN: 2145 case ISD::VP_FROUND: 2146 case ISD::VP_FROUNDEVEN: 2147 case ISD::VP_FROUNDTOZERO: { 2148 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 2149 assert(FRM != RISCVFPRndMode::Invalid); 2150 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask, 2151 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 2152 break; 2153 } 2154 case ISD::FTRUNC: 2155 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src, 2156 Mask, VL); 2157 break; 2158 case ISD::VP_FRINT: 2159 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL); 2160 break; 2161 case ISD::VP_FNEARBYINT: 2162 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src, 2163 Mask, VL); 2164 break; 2165 } 2166 2167 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL. 2168 if (Op.getOpcode() != ISD::VP_FNEARBYINT) 2169 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated, 2170 Mask, VL); 2171 2172 // Restore the original sign so that -0.0 is preserved. 2173 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated, 2174 Src, Src, Mask, VL); 2175 2176 if (!VT.isFixedLengthVector()) 2177 return Truncated; 2178 2179 return convertFromScalableVector(VT, Truncated, DAG, Subtarget); 2180 } 2181 2182 static SDValue 2183 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, 2184 const RISCVSubtarget &Subtarget) { 2185 MVT VT = Op.getSimpleValueType(); 2186 if (VT.isVector()) 2187 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 2188 2189 if (DAG.shouldOptForSize()) 2190 return SDValue(); 2191 2192 SDLoc DL(Op); 2193 SDValue Src = Op.getOperand(0); 2194 2195 // Create an integer the size of the mantissa with the MSB set. This and all 2196 // values larger than it don't have any fractional bits so don't need to be 2197 // converted. 2198 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); 2199 unsigned Precision = APFloat::semanticsPrecision(FltSem); 2200 APFloat MaxVal = APFloat(FltSem); 2201 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 2202 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 2203 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); 2204 2205 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode()); 2206 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode, 2207 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT())); 2208 } 2209 2210 struct VIDSequence { 2211 int64_t StepNumerator; 2212 unsigned StepDenominator; 2213 int64_t Addend; 2214 }; 2215 2216 static std::optional<uint64_t> getExactInteger(const APFloat &APF, 2217 uint32_t BitWidth) { 2218 APSInt ValInt(BitWidth, !APF.isNegative()); 2219 // We use an arbitrary rounding mode here. If a floating-point is an exact 2220 // integer (e.g., 1.0), the rounding mode does not affect the output value. If 2221 // the rounding mode changes the output value, then it is not an exact 2222 // integer. 2223 RoundingMode ArbitraryRM = RoundingMode::TowardZero; 2224 bool IsExact; 2225 // If it is out of signed integer range, it will return an invalid operation. 2226 // If it is not an exact integer, IsExact is false. 2227 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) == 2228 APFloatBase::opInvalidOp) || 2229 !IsExact) 2230 return std::nullopt; 2231 return ValInt.extractBitsAsZExtValue(BitWidth, 0); 2232 } 2233 2234 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] 2235 // to the (non-zero) step S and start value X. This can be then lowered as the 2236 // RVV sequence (VID * S) + X, for example. 2237 // The step S is represented as an integer numerator divided by a positive 2238 // denominator. Note that the implementation currently only identifies 2239 // sequences in which either the numerator is +/- 1 or the denominator is 1. It 2240 // cannot detect 2/3, for example. 2241 // Note that this method will also match potentially unappealing index 2242 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to 2243 // determine whether this is worth generating code for. 2244 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) { 2245 unsigned NumElts = Op.getNumOperands(); 2246 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); 2247 bool IsInteger = Op.getValueType().isInteger(); 2248 2249 std::optional<unsigned> SeqStepDenom; 2250 std::optional<int64_t> SeqStepNum, SeqAddend; 2251 std::optional<std::pair<uint64_t, unsigned>> PrevElt; 2252 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits(); 2253 for (unsigned Idx = 0; Idx < NumElts; Idx++) { 2254 // Assume undef elements match the sequence; we just have to be careful 2255 // when interpolating across them. 2256 if (Op.getOperand(Idx).isUndef()) 2257 continue; 2258 2259 uint64_t Val; 2260 if (IsInteger) { 2261 // The BUILD_VECTOR must be all constants. 2262 if (!isa<ConstantSDNode>(Op.getOperand(Idx))) 2263 return std::nullopt; 2264 Val = Op.getConstantOperandVal(Idx) & 2265 maskTrailingOnes<uint64_t>(EltSizeInBits); 2266 } else { 2267 // The BUILD_VECTOR must be all constants. 2268 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx))) 2269 return std::nullopt; 2270 if (auto ExactInteger = getExactInteger( 2271 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), 2272 EltSizeInBits)) 2273 Val = *ExactInteger; 2274 else 2275 return std::nullopt; 2276 } 2277 2278 if (PrevElt) { 2279 // Calculate the step since the last non-undef element, and ensure 2280 // it's consistent across the entire sequence. 2281 unsigned IdxDiff = Idx - PrevElt->second; 2282 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits); 2283 2284 // A zero-value value difference means that we're somewhere in the middle 2285 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a 2286 // step change before evaluating the sequence. 2287 if (ValDiff == 0) 2288 continue; 2289 2290 int64_t Remainder = ValDiff % IdxDiff; 2291 // Normalize the step if it's greater than 1. 2292 if (Remainder != ValDiff) { 2293 // The difference must cleanly divide the element span. 2294 if (Remainder != 0) 2295 return std::nullopt; 2296 ValDiff /= IdxDiff; 2297 IdxDiff = 1; 2298 } 2299 2300 if (!SeqStepNum) 2301 SeqStepNum = ValDiff; 2302 else if (ValDiff != SeqStepNum) 2303 return std::nullopt; 2304 2305 if (!SeqStepDenom) 2306 SeqStepDenom = IdxDiff; 2307 else if (IdxDiff != *SeqStepDenom) 2308 return std::nullopt; 2309 } 2310 2311 // Record this non-undef element for later. 2312 if (!PrevElt || PrevElt->first != Val) 2313 PrevElt = std::make_pair(Val, Idx); 2314 } 2315 2316 // We need to have logged a step for this to count as a legal index sequence. 2317 if (!SeqStepNum || !SeqStepDenom) 2318 return std::nullopt; 2319 2320 // Loop back through the sequence and validate elements we might have skipped 2321 // while waiting for a valid step. While doing this, log any sequence addend. 2322 for (unsigned Idx = 0; Idx < NumElts; Idx++) { 2323 if (Op.getOperand(Idx).isUndef()) 2324 continue; 2325 uint64_t Val; 2326 if (IsInteger) { 2327 Val = Op.getConstantOperandVal(Idx) & 2328 maskTrailingOnes<uint64_t>(EltSizeInBits); 2329 } else { 2330 Val = *getExactInteger( 2331 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(), 2332 EltSizeInBits); 2333 } 2334 uint64_t ExpectedVal = 2335 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; 2336 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); 2337 if (!SeqAddend) 2338 SeqAddend = Addend; 2339 else if (Addend != SeqAddend) 2340 return std::nullopt; 2341 } 2342 2343 assert(SeqAddend && "Must have an addend if we have a step"); 2344 2345 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; 2346 } 2347 2348 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT 2349 // and lower it as a VRGATHER_VX_VL from the source vector. 2350 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, 2351 SelectionDAG &DAG, 2352 const RISCVSubtarget &Subtarget) { 2353 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2354 return SDValue(); 2355 SDValue Vec = SplatVal.getOperand(0); 2356 // Only perform this optimization on vectors of the same size for simplicity. 2357 // Don't perform this optimization for i1 vectors. 2358 // FIXME: Support i1 vectors, maybe by promoting to i8? 2359 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1) 2360 return SDValue(); 2361 SDValue Idx = SplatVal.getOperand(1); 2362 // The index must be a legal type. 2363 if (Idx.getValueType() != Subtarget.getXLenVT()) 2364 return SDValue(); 2365 2366 MVT ContainerVT = VT; 2367 if (VT.isFixedLengthVector()) { 2368 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2369 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 2370 } 2371 2372 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2373 2374 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec, 2375 Idx, DAG.getUNDEF(ContainerVT), Mask, VL); 2376 2377 if (!VT.isFixedLengthVector()) 2378 return Gather; 2379 2380 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 2381 } 2382 2383 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 2384 const RISCVSubtarget &Subtarget) { 2385 MVT VT = Op.getSimpleValueType(); 2386 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 2387 2388 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2389 2390 SDLoc DL(Op); 2391 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2392 2393 MVT XLenVT = Subtarget.getXLenVT(); 2394 unsigned NumElts = Op.getNumOperands(); 2395 2396 if (VT.getVectorElementType() == MVT::i1) { 2397 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 2398 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 2399 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 2400 } 2401 2402 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 2403 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 2404 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 2405 } 2406 2407 // Lower constant mask BUILD_VECTORs via an integer vector type, in 2408 // scalar integer chunks whose bit-width depends on the number of mask 2409 // bits and XLEN. 2410 // First, determine the most appropriate scalar integer type to use. This 2411 // is at most XLenVT, but may be shrunk to a smaller vector element type 2412 // according to the size of the final vector - use i8 chunks rather than 2413 // XLenVT if we're producing a v8i1. This results in more consistent 2414 // codegen across RV32 and RV64. 2415 unsigned NumViaIntegerBits = 2416 std::min(std::max(NumElts, 8u), Subtarget.getXLen()); 2417 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN()); 2418 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { 2419 // If we have to use more than one INSERT_VECTOR_ELT then this 2420 // optimization is likely to increase code size; avoid peforming it in 2421 // such a case. We can use a load from a constant pool in this case. 2422 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) 2423 return SDValue(); 2424 // Now we can create our integer vector type. Note that it may be larger 2425 // than the resulting mask type: v4i1 would use v1i8 as its integer type. 2426 MVT IntegerViaVecVT = 2427 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), 2428 divideCeil(NumElts, NumViaIntegerBits)); 2429 2430 uint64_t Bits = 0; 2431 unsigned BitPos = 0, IntegerEltIdx = 0; 2432 SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); 2433 2434 for (unsigned I = 0; I < NumElts; I++, BitPos++) { 2435 // Once we accumulate enough bits to fill our scalar type, insert into 2436 // our vector and clear our accumulated data. 2437 if (I != 0 && I % NumViaIntegerBits == 0) { 2438 if (NumViaIntegerBits <= 32) 2439 Bits = SignExtend64<32>(Bits); 2440 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 2441 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, 2442 Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); 2443 Bits = 0; 2444 BitPos = 0; 2445 IntegerEltIdx++; 2446 } 2447 SDValue V = Op.getOperand(I); 2448 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); 2449 Bits |= ((uint64_t)BitValue << BitPos); 2450 } 2451 2452 // Insert the (remaining) scalar value into position in our integer 2453 // vector type. 2454 if (NumViaIntegerBits <= 32) 2455 Bits = SignExtend64<32>(Bits); 2456 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 2457 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, 2458 DAG.getConstant(IntegerEltIdx, DL, XLenVT)); 2459 2460 if (NumElts < NumViaIntegerBits) { 2461 // If we're producing a smaller vector than our minimum legal integer 2462 // type, bitcast to the equivalent (known-legal) mask type, and extract 2463 // our final mask. 2464 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); 2465 Vec = DAG.getBitcast(MVT::v8i1, Vec); 2466 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, 2467 DAG.getConstant(0, DL, XLenVT)); 2468 } else { 2469 // Else we must have produced an integer type with the same size as the 2470 // mask type; bitcast for the final result. 2471 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); 2472 Vec = DAG.getBitcast(VT, Vec); 2473 } 2474 2475 return Vec; 2476 } 2477 2478 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask 2479 // vector type, we have a legal equivalently-sized i8 type, so we can use 2480 // that. 2481 MVT WideVecVT = VT.changeVectorElementType(MVT::i8); 2482 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); 2483 2484 SDValue WideVec; 2485 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 2486 // For a splat, perform a scalar truncate before creating the wider 2487 // vector. 2488 assert(Splat.getValueType() == XLenVT && 2489 "Unexpected type for i1 splat value"); 2490 Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat, 2491 DAG.getConstant(1, DL, XLenVT)); 2492 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); 2493 } else { 2494 SmallVector<SDValue, 8> Ops(Op->op_values()); 2495 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); 2496 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); 2497 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); 2498 } 2499 2500 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); 2501 } 2502 2503 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 2504 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) 2505 return Gather; 2506 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 2507 : RISCVISD::VMV_V_X_VL; 2508 Splat = 2509 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL); 2510 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 2511 } 2512 2513 // Try and match index sequences, which we can lower to the vid instruction 2514 // with optional modifications. An all-undef vector is matched by 2515 // getSplatValue, above. 2516 if (auto SimpleVID = isSimpleVIDSequence(Op)) { 2517 int64_t StepNumerator = SimpleVID->StepNumerator; 2518 unsigned StepDenominator = SimpleVID->StepDenominator; 2519 int64_t Addend = SimpleVID->Addend; 2520 2521 assert(StepNumerator != 0 && "Invalid step"); 2522 bool Negate = false; 2523 int64_t SplatStepVal = StepNumerator; 2524 unsigned StepOpcode = ISD::MUL; 2525 if (StepNumerator != 1) { 2526 if (isPowerOf2_64(std::abs(StepNumerator))) { 2527 Negate = StepNumerator < 0; 2528 StepOpcode = ISD::SHL; 2529 SplatStepVal = Log2_64(std::abs(StepNumerator)); 2530 } 2531 } 2532 2533 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a 2534 // threshold since it's the immediate value many RVV instructions accept. 2535 // There is no vmul.vi instruction so ensure multiply constant can fit in 2536 // a single addi instruction. 2537 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || 2538 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && 2539 isPowerOf2_32(StepDenominator) && 2540 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { 2541 MVT VIDVT = 2542 VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; 2543 MVT VIDContainerVT = 2544 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget); 2545 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL); 2546 // Convert right out of the scalable type so we can use standard ISD 2547 // nodes for the rest of the computation. If we used scalable types with 2548 // these, we'd lose the fixed-length vector info and generate worse 2549 // vsetvli code. 2550 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget); 2551 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || 2552 (StepOpcode == ISD::SHL && SplatStepVal != 0)) { 2553 SDValue SplatStep = DAG.getSplatBuildVector( 2554 VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); 2555 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep); 2556 } 2557 if (StepDenominator != 1) { 2558 SDValue SplatStep = DAG.getSplatBuildVector( 2559 VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT)); 2560 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep); 2561 } 2562 if (Addend != 0 || Negate) { 2563 SDValue SplatAddend = DAG.getSplatBuildVector( 2564 VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT)); 2565 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend, 2566 VID); 2567 } 2568 if (VT.isFloatingPoint()) { 2569 // TODO: Use vfwcvt to reduce register pressure. 2570 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID); 2571 } 2572 return VID; 2573 } 2574 } 2575 2576 // Attempt to detect "hidden" splats, which only reveal themselves as splats 2577 // when re-interpreted as a vector with a larger element type. For example, 2578 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 2579 // could be instead splat as 2580 // v2i32 = build_vector i32 0x00010000, i32 0x00010000 2581 // TODO: This optimization could also work on non-constant splats, but it 2582 // would require bit-manipulation instructions to construct the splat value. 2583 SmallVector<SDValue> Sequence; 2584 unsigned EltBitSize = VT.getScalarSizeInBits(); 2585 const auto *BV = cast<BuildVectorSDNode>(Op); 2586 if (VT.isInteger() && EltBitSize < 64 && 2587 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 2588 BV->getRepeatedSequence(Sequence) && 2589 (Sequence.size() * EltBitSize) <= 64) { 2590 unsigned SeqLen = Sequence.size(); 2591 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); 2592 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); 2593 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || 2594 ViaIntVT == MVT::i64) && 2595 "Unexpected sequence type"); 2596 2597 unsigned EltIdx = 0; 2598 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 2599 uint64_t SplatValue = 0; 2600 // Construct the amalgamated value which can be splatted as this larger 2601 // vector type. 2602 for (const auto &SeqV : Sequence) { 2603 if (!SeqV.isUndef()) 2604 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) 2605 << (EltIdx * EltBitSize)); 2606 EltIdx++; 2607 } 2608 2609 // On RV64, sign-extend from 32 to 64 bits where possible in order to 2610 // achieve better constant materializion. 2611 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 2612 SplatValue = SignExtend64<32>(SplatValue); 2613 2614 // Since we can't introduce illegal i64 types at this stage, we can only 2615 // perform an i64 splat on RV32 if it is its own sign-extended value. That 2616 // way we can use RVV instructions to splat. 2617 assert((ViaIntVT.bitsLE(XLenVT) || 2618 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && 2619 "Unexpected bitcast sequence"); 2620 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { 2621 SDValue ViaVL = 2622 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); 2623 MVT ViaContainerVT = 2624 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); 2625 SDValue Splat = 2626 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, 2627 DAG.getUNDEF(ViaContainerVT), 2628 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); 2629 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); 2630 return DAG.getBitcast(VT, Splat); 2631 } 2632 } 2633 2634 // Try and optimize BUILD_VECTORs with "dominant values" - these are values 2635 // which constitute a large proportion of the elements. In such cases we can 2636 // splat a vector with the dominant element and make up the shortfall with 2637 // INSERT_VECTOR_ELTs. 2638 // Note that this includes vectors of 2 elements by association. The 2639 // upper-most element is the "dominant" one, allowing us to use a splat to 2640 // "insert" the upper element, and an insert of the lower element at position 2641 // 0, which improves codegen. 2642 SDValue DominantValue; 2643 unsigned MostCommonCount = 0; 2644 DenseMap<SDValue, unsigned> ValueCounts; 2645 unsigned NumUndefElts = 2646 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 2647 2648 // Track the number of scalar loads we know we'd be inserting, estimated as 2649 // any non-zero floating-point constant. Other kinds of element are either 2650 // already in registers or are materialized on demand. The threshold at which 2651 // a vector load is more desirable than several scalar materializion and 2652 // vector-insertion instructions is not known. 2653 unsigned NumScalarLoads = 0; 2654 2655 for (SDValue V : Op->op_values()) { 2656 if (V.isUndef()) 2657 continue; 2658 2659 ValueCounts.insert(std::make_pair(V, 0)); 2660 unsigned &Count = ValueCounts[V]; 2661 2662 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) 2663 NumScalarLoads += !CFP->isExactlyValue(+0.0); 2664 2665 // Is this value dominant? In case of a tie, prefer the highest element as 2666 // it's cheaper to insert near the beginning of a vector than it is at the 2667 // end. 2668 if (++Count >= MostCommonCount) { 2669 DominantValue = V; 2670 MostCommonCount = Count; 2671 } 2672 } 2673 2674 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); 2675 unsigned NumDefElts = NumElts - NumUndefElts; 2676 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 2677 2678 // Don't perform this optimization when optimizing for size, since 2679 // materializing elements and inserting them tends to cause code bloat. 2680 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && 2681 ((MostCommonCount > DominantValueCountThreshold) || 2682 (ValueCounts.size() <= Log2_32(NumDefElts)))) { 2683 // Start by splatting the most common element. 2684 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); 2685 2686 DenseSet<SDValue> Processed{DominantValue}; 2687 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); 2688 for (const auto &OpIdx : enumerate(Op->ops())) { 2689 const SDValue &V = OpIdx.value(); 2690 if (V.isUndef() || !Processed.insert(V).second) 2691 continue; 2692 if (ValueCounts[V] == 1) { 2693 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, 2694 DAG.getConstant(OpIdx.index(), DL, XLenVT)); 2695 } else { 2696 // Blend in all instances of this value using a VSELECT, using a 2697 // mask where each bit signals whether that element is the one 2698 // we're after. 2699 SmallVector<SDValue> Ops; 2700 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { 2701 return DAG.getConstant(V == V1, DL, XLenVT); 2702 }); 2703 Vec = DAG.getNode(ISD::VSELECT, DL, VT, 2704 DAG.getBuildVector(SelMaskTy, DL, Ops), 2705 DAG.getSplatBuildVector(VT, DL, V), Vec); 2706 } 2707 } 2708 2709 return Vec; 2710 } 2711 2712 return SDValue(); 2713 } 2714 2715 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 2716 SDValue Lo, SDValue Hi, SDValue VL, 2717 SelectionDAG &DAG) { 2718 if (!Passthru) 2719 Passthru = DAG.getUNDEF(VT); 2720 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 2721 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 2722 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 2723 // If Hi constant is all the same sign bit as Lo, lower this as a custom 2724 // node in order to try and match RVV vector/scalar instructions. 2725 if ((LoC >> 31) == HiC) 2726 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL); 2727 2728 // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use 2729 // vmv.v.x whose EEW = 32 to lower it. 2730 auto *Const = dyn_cast<ConstantSDNode>(VL); 2731 if (LoC == HiC && Const && Const->isAllOnesValue()) { 2732 MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 2733 // TODO: if vl <= min(VLMAX), we can also do this. But we could not 2734 // access the subtarget here now. 2735 auto InterVec = DAG.getNode( 2736 RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo, 2737 DAG.getRegister(RISCV::X0, MVT::i32)); 2738 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec); 2739 } 2740 } 2741 2742 // Fall back to a stack store and stride x0 vector load. 2743 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo, 2744 Hi, VL); 2745 } 2746 2747 // Called by type legalization to handle splat of i64 on RV32. 2748 // FIXME: We can optimize this when the type has sign or zero bits in one 2749 // of the halves. 2750 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, 2751 SDValue Scalar, SDValue VL, 2752 SelectionDAG &DAG) { 2753 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); 2754 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2755 DAG.getConstant(0, DL, MVT::i32)); 2756 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2757 DAG.getConstant(1, DL, MVT::i32)); 2758 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG); 2759 } 2760 2761 // This function lowers a splat of a scalar operand Splat with the vector 2762 // length VL. It ensures the final sequence is type legal, which is useful when 2763 // lowering a splat after type legalization. 2764 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, 2765 MVT VT, SDLoc DL, SelectionDAG &DAG, 2766 const RISCVSubtarget &Subtarget) { 2767 bool HasPassthru = Passthru && !Passthru.isUndef(); 2768 if (!HasPassthru && !Passthru) 2769 Passthru = DAG.getUNDEF(VT); 2770 if (VT.isFloatingPoint()) { 2771 // If VL is 1, we could use vfmv.s.f. 2772 if (isOneConstant(VL)) 2773 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL); 2774 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); 2775 } 2776 2777 MVT XLenVT = Subtarget.getXLenVT(); 2778 2779 // Simplest case is that the operand needs to be promoted to XLenVT. 2780 if (Scalar.getValueType().bitsLE(XLenVT)) { 2781 // If the operand is a constant, sign extend to increase our chances 2782 // of being able to use a .vi instruction. ANY_EXTEND would become a 2783 // a zero extend and the simm5 check in isel would fail. 2784 // FIXME: Should we ignore the upper bits in isel instead? 2785 unsigned ExtOpc = 2786 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2787 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 2788 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar); 2789 // If VL is 1 and the scalar value won't benefit from immediate, we could 2790 // use vmv.s.x. 2791 if (isOneConstant(VL) && 2792 (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue()))) 2793 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL); 2794 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 2795 } 2796 2797 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && 2798 "Unexpected scalar for splat lowering!"); 2799 2800 if (isOneConstant(VL) && isNullConstant(Scalar)) 2801 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, 2802 DAG.getConstant(0, DL, XLenVT), VL); 2803 2804 // Otherwise use the more complicated splatting algorithm. 2805 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); 2806 } 2807 2808 static MVT getLMUL1VT(MVT VT) { 2809 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 2810 "Unexpected vector MVT"); 2811 return MVT::getScalableVectorVT( 2812 VT.getVectorElementType(), 2813 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 2814 } 2815 2816 // This function lowers an insert of a scalar operand Scalar into lane 2817 // 0 of the vector regardless of the value of VL. The contents of the 2818 // remaining lanes of the result vector are unspecified. VL is assumed 2819 // to be non-zero. 2820 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, 2821 MVT VT, SDLoc DL, SelectionDAG &DAG, 2822 const RISCVSubtarget &Subtarget) { 2823 const MVT XLenVT = Subtarget.getXLenVT(); 2824 2825 SDValue Passthru = DAG.getUNDEF(VT); 2826 if (VT.isFloatingPoint()) { 2827 // TODO: Use vmv.v.i for appropriate constants 2828 // Use M1 or smaller to avoid over constraining register allocation 2829 const MVT M1VT = getLMUL1VT(VT); 2830 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT; 2831 SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT, 2832 DAG.getUNDEF(InnerVT), Scalar, VL); 2833 if (VT != InnerVT) 2834 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 2835 DAG.getUNDEF(VT), 2836 Result, DAG.getConstant(0, DL, XLenVT)); 2837 return Result; 2838 } 2839 2840 2841 // Avoid the tricky legalization cases by falling back to using the 2842 // splat code which already handles it gracefully. 2843 if (!Scalar.getValueType().bitsLE(XLenVT)) 2844 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar, 2845 DAG.getConstant(1, DL, XLenVT), 2846 VT, DL, DAG, Subtarget); 2847 2848 // If the operand is a constant, sign extend to increase our chances 2849 // of being able to use a .vi instruction. ANY_EXTEND would become a 2850 // a zero extend and the simm5 check in isel would fail. 2851 // FIXME: Should we ignore the upper bits in isel instead? 2852 unsigned ExtOpc = 2853 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2854 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 2855 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or 2856 // higher would involve overly constraining the register allocator for 2857 // no purpose. 2858 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) { 2859 if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) && 2860 VT.bitsLE(getLMUL1VT(VT))) 2861 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); 2862 } 2863 // Use M1 or smaller to avoid over constraining register allocation 2864 const MVT M1VT = getLMUL1VT(VT); 2865 auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT; 2866 SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT, 2867 DAG.getUNDEF(InnerVT), Scalar, VL); 2868 if (VT != InnerVT) 2869 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 2870 DAG.getUNDEF(VT), 2871 Result, DAG.getConstant(0, DL, XLenVT)); 2872 return Result; 2873 2874 } 2875 2876 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources, 2877 const RISCVSubtarget &Subtarget) { 2878 // We need to be able to widen elements to the next larger integer type. 2879 if (VT.getScalarSizeInBits() >= Subtarget.getELEN()) 2880 return false; 2881 2882 int Size = Mask.size(); 2883 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); 2884 2885 int Srcs[] = {-1, -1}; 2886 for (int i = 0; i != Size; ++i) { 2887 // Ignore undef elements. 2888 if (Mask[i] < 0) 2889 continue; 2890 2891 // Is this an even or odd element. 2892 int Pol = i % 2; 2893 2894 // Ensure we consistently use the same source for this element polarity. 2895 int Src = Mask[i] / Size; 2896 if (Srcs[Pol] < 0) 2897 Srcs[Pol] = Src; 2898 if (Srcs[Pol] != Src) 2899 return false; 2900 2901 // Make sure the element within the source is appropriate for this element 2902 // in the destination. 2903 int Elt = Mask[i] % Size; 2904 if (Elt != i / 2) 2905 return false; 2906 } 2907 2908 // We need to find a source for each polarity and they can't be the same. 2909 if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1]) 2910 return false; 2911 2912 // Swap the sources if the second source was in the even polarity. 2913 SwapSources = Srcs[0] > Srcs[1]; 2914 2915 return true; 2916 } 2917 2918 /// Match shuffles that concatenate two vectors, rotate the concatenation, 2919 /// and then extract the original number of elements from the rotated result. 2920 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The 2921 /// returned rotation amount is for a rotate right, where elements move from 2922 /// higher elements to lower elements. \p LoSrc indicates the first source 2923 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector 2924 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be 2925 /// 0 or 1 if a rotation is found. 2926 /// 2927 /// NOTE: We talk about rotate to the right which matches how bit shift and 2928 /// rotate instructions are described where LSBs are on the right, but LLVM IR 2929 /// and the table below write vectors with the lowest elements on the left. 2930 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) { 2931 int Size = Mask.size(); 2932 2933 // We need to detect various ways of spelling a rotation: 2934 // [11, 12, 13, 14, 15, 0, 1, 2] 2935 // [-1, 12, 13, 14, -1, -1, 1, -1] 2936 // [-1, -1, -1, -1, -1, -1, 1, 2] 2937 // [ 3, 4, 5, 6, 7, 8, 9, 10] 2938 // [-1, 4, 5, 6, -1, -1, 9, -1] 2939 // [-1, 4, 5, 6, -1, -1, -1, -1] 2940 int Rotation = 0; 2941 LoSrc = -1; 2942 HiSrc = -1; 2943 for (int i = 0; i != Size; ++i) { 2944 int M = Mask[i]; 2945 if (M < 0) 2946 continue; 2947 2948 // Determine where a rotate vector would have started. 2949 int StartIdx = i - (M % Size); 2950 // The identity rotation isn't interesting, stop. 2951 if (StartIdx == 0) 2952 return -1; 2953 2954 // If we found the tail of a vector the rotation must be the missing 2955 // front. If we found the head of a vector, it must be how much of the 2956 // head. 2957 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx; 2958 2959 if (Rotation == 0) 2960 Rotation = CandidateRotation; 2961 else if (Rotation != CandidateRotation) 2962 // The rotations don't match, so we can't match this mask. 2963 return -1; 2964 2965 // Compute which value this mask is pointing at. 2966 int MaskSrc = M < Size ? 0 : 1; 2967 2968 // Compute which of the two target values this index should be assigned to. 2969 // This reflects whether the high elements are remaining or the low elemnts 2970 // are remaining. 2971 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; 2972 2973 // Either set up this value if we've not encountered it before, or check 2974 // that it remains consistent. 2975 if (TargetSrc < 0) 2976 TargetSrc = MaskSrc; 2977 else if (TargetSrc != MaskSrc) 2978 // This may be a rotation, but it pulls from the inputs in some 2979 // unsupported interleaving. 2980 return -1; 2981 } 2982 2983 // Check that we successfully analyzed the mask, and normalize the results. 2984 assert(Rotation != 0 && "Failed to locate a viable rotation!"); 2985 assert((LoSrc >= 0 || HiSrc >= 0) && 2986 "Failed to find a rotated input vector!"); 2987 2988 return Rotation; 2989 } 2990 2991 // Lower the following shuffles to vnsrl. 2992 // t34: v8i8 = extract_subvector t11, Constant:i64<0> 2993 // t33: v8i8 = extract_subvector t11, Constant:i64<8> 2994 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 2995 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 2996 static SDValue lowerVECTOR_SHUFFLEAsVNSRL(const SDLoc &DL, MVT VT, 2997 MVT ContainerVT, SDValue V1, 2998 SDValue V2, SDValue TrueMask, 2999 SDValue VL, ArrayRef<int> Mask, 3000 const RISCVSubtarget &Subtarget, 3001 SelectionDAG &DAG) { 3002 // Need to be able to widen the vector. 3003 if (VT.getScalarSizeInBits() >= Subtarget.getELEN()) 3004 return SDValue(); 3005 3006 // Both input must be extracts. 3007 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR || 3008 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR) 3009 return SDValue(); 3010 3011 // Extracting from the same source. 3012 SDValue Src = V1.getOperand(0); 3013 if (Src != V2.getOperand(0)) 3014 return SDValue(); 3015 3016 // Src needs to have twice the number of elements. 3017 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) 3018 return SDValue(); 3019 3020 // The extracts must extract the two halves of the source. 3021 if (V1.getConstantOperandVal(1) != 0 || 3022 V2.getConstantOperandVal(1) != Mask.size()) 3023 return SDValue(); 3024 3025 // First index must be the first even or odd element from V1. 3026 if (Mask[0] != 0 && Mask[0] != 1) 3027 return SDValue(); 3028 3029 // The others must increase by 2 each time. 3030 // TODO: Support undef elements? 3031 for (unsigned i = 1; i != Mask.size(); ++i) 3032 if (Mask[i] != Mask[i - 1] + 2) 3033 return SDValue(); 3034 3035 // Convert the source using a container type with twice the elements. Since 3036 // source VT is legal and twice this VT, we know VT isn't LMUL=8 so it is 3037 // safe to double. 3038 MVT DoubleContainerVT = 3039 MVT::getVectorVT(ContainerVT.getVectorElementType(), 3040 ContainerVT.getVectorElementCount() * 2); 3041 Src = convertToScalableVector(DoubleContainerVT, Src, DAG, Subtarget); 3042 3043 // Convert the vector to a wider integer type with the original element 3044 // count. This also converts FP to int. 3045 unsigned EltBits = ContainerVT.getScalarSizeInBits(); 3046 MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2); 3047 MVT WideIntContainerVT = 3048 MVT::getVectorVT(WideIntEltVT, ContainerVT.getVectorElementCount()); 3049 Src = DAG.getBitcast(WideIntContainerVT, Src); 3050 3051 // Convert to the integer version of the container type. 3052 MVT IntEltVT = MVT::getIntegerVT(EltBits); 3053 MVT IntContainerVT = 3054 MVT::getVectorVT(IntEltVT, ContainerVT.getVectorElementCount()); 3055 3056 // If we want even elements, then the shift amount is 0. Otherwise, shift by 3057 // the original element size. 3058 unsigned Shift = Mask[0] == 0 ? 0 : EltBits; 3059 SDValue SplatShift = DAG.getNode( 3060 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT), 3061 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL); 3062 SDValue Res = 3063 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift, 3064 DAG.getUNDEF(IntContainerVT), TrueMask, VL); 3065 // Cast back to FP if needed. 3066 Res = DAG.getBitcast(ContainerVT, Res); 3067 3068 return convertFromScalableVector(VT, Res, DAG, Subtarget); 3069 } 3070 3071 static SDValue 3072 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, SDLoc DL, 3073 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, 3074 SDValue VL, 3075 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 3076 if (Merge.isUndef()) 3077 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3078 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 3079 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 3080 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops); 3081 } 3082 3083 static SDValue 3084 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, SDLoc DL, 3085 EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, 3086 SDValue VL, 3087 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) { 3088 if (Merge.isUndef()) 3089 Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; 3090 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT()); 3091 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp}; 3092 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops); 3093 } 3094 3095 // Lower the following shuffle to vslidedown. 3096 // a) 3097 // t49: v8i8 = extract_subvector t13, Constant:i64<0> 3098 // t109: v8i8 = extract_subvector t13, Constant:i64<8> 3099 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106 3100 // b) 3101 // t69: v16i16 = extract_subvector t68, Constant:i64<0> 3102 // t23: v8i16 = extract_subvector t69, Constant:i64<0> 3103 // t29: v4i16 = extract_subvector t23, Constant:i64<4> 3104 // t26: v8i16 = extract_subvector t69, Constant:i64<8> 3105 // t30: v4i16 = extract_subvector t26, Constant:i64<0> 3106 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30 3107 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, 3108 SDValue V1, SDValue V2, 3109 ArrayRef<int> Mask, 3110 const RISCVSubtarget &Subtarget, 3111 SelectionDAG &DAG) { 3112 auto findNonEXTRACT_SUBVECTORParent = 3113 [](SDValue Parent) -> std::pair<SDValue, uint64_t> { 3114 uint64_t Offset = 0; 3115 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR && 3116 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from 3117 // a scalable vector. But we don't want to match the case. 3118 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) { 3119 Offset += Parent.getConstantOperandVal(1); 3120 Parent = Parent.getOperand(0); 3121 } 3122 return std::make_pair(Parent, Offset); 3123 }; 3124 3125 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1); 3126 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2); 3127 3128 // Extracting from the same source. 3129 SDValue Src = V1Src; 3130 if (Src != V2Src) 3131 return SDValue(); 3132 3133 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs. 3134 SmallVector<int, 16> NewMask(Mask); 3135 for (size_t i = 0; i != NewMask.size(); ++i) { 3136 if (NewMask[i] == -1) 3137 continue; 3138 3139 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) { 3140 NewMask[i] = NewMask[i] + V1IndexOffset; 3141 } else { 3142 // Minus NewMask.size() is needed. Otherwise, the b case would be 3143 // <5,6,7,12> instead of <5,6,7,8>. 3144 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset; 3145 } 3146 } 3147 3148 // First index must be known and non-zero. It will be used as the slidedown 3149 // amount. 3150 if (NewMask[0] <= 0) 3151 return SDValue(); 3152 3153 // NewMask is also continuous. 3154 for (unsigned i = 1; i != NewMask.size(); ++i) 3155 if (NewMask[i - 1] + 1 != NewMask[i]) 3156 return SDValue(); 3157 3158 MVT XLenVT = Subtarget.getXLenVT(); 3159 MVT SrcVT = Src.getSimpleValueType(); 3160 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 3161 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 3162 SDValue Slidedown = 3163 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 3164 convertToScalableVector(ContainerVT, Src, DAG, Subtarget), 3165 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL); 3166 return DAG.getNode( 3167 ISD::EXTRACT_SUBVECTOR, DL, VT, 3168 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 3169 DAG.getConstant(0, DL, XLenVT)); 3170 } 3171 3172 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 3173 const RISCVSubtarget &Subtarget) { 3174 SDValue V1 = Op.getOperand(0); 3175 SDValue V2 = Op.getOperand(1); 3176 SDLoc DL(Op); 3177 MVT XLenVT = Subtarget.getXLenVT(); 3178 MVT VT = Op.getSimpleValueType(); 3179 unsigned NumElts = VT.getVectorNumElements(); 3180 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 3181 3182 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 3183 3184 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3185 3186 if (SVN->isSplat()) { 3187 const int Lane = SVN->getSplatIndex(); 3188 if (Lane >= 0) { 3189 MVT SVT = VT.getVectorElementType(); 3190 3191 // Turn splatted vector load into a strided load with an X0 stride. 3192 SDValue V = V1; 3193 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector 3194 // with undef. 3195 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? 3196 int Offset = Lane; 3197 if (V.getOpcode() == ISD::CONCAT_VECTORS) { 3198 int OpElements = 3199 V.getOperand(0).getSimpleValueType().getVectorNumElements(); 3200 V = V.getOperand(Offset / OpElements); 3201 Offset %= OpElements; 3202 } 3203 3204 // We need to ensure the load isn't atomic or volatile. 3205 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) { 3206 auto *Ld = cast<LoadSDNode>(V); 3207 Offset *= SVT.getStoreSize(); 3208 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), 3209 TypeSize::Fixed(Offset), DL); 3210 3211 // If this is SEW=64 on RV32, use a strided load with a stride of x0. 3212 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { 3213 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 3214 SDValue IntID = 3215 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); 3216 SDValue Ops[] = {Ld->getChain(), 3217 IntID, 3218 DAG.getUNDEF(ContainerVT), 3219 NewAddr, 3220 DAG.getRegister(RISCV::X0, XLenVT), 3221 VL}; 3222 SDValue NewLoad = DAG.getMemIntrinsicNode( 3223 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, 3224 DAG.getMachineFunction().getMachineMemOperand( 3225 Ld->getMemOperand(), Offset, SVT.getStoreSize())); 3226 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); 3227 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 3228 } 3229 3230 // Otherwise use a scalar load and splat. This will give the best 3231 // opportunity to fold a splat into the operation. ISel can turn it into 3232 // the x0 strided load if we aren't able to fold away the select. 3233 if (SVT.isFloatingPoint()) 3234 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, 3235 Ld->getPointerInfo().getWithOffset(Offset), 3236 Ld->getOriginalAlign(), 3237 Ld->getMemOperand()->getFlags()); 3238 else 3239 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr, 3240 Ld->getPointerInfo().getWithOffset(Offset), SVT, 3241 Ld->getOriginalAlign(), 3242 Ld->getMemOperand()->getFlags()); 3243 DAG.makeEquivalentMemoryOrdering(Ld, V); 3244 3245 unsigned Opc = 3246 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; 3247 SDValue Splat = 3248 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL); 3249 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 3250 } 3251 3252 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 3253 assert(Lane < (int)NumElts && "Unexpected lane!"); 3254 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, 3255 V1, DAG.getConstant(Lane, DL, XLenVT), 3256 DAG.getUNDEF(ContainerVT), TrueMask, VL); 3257 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 3258 } 3259 } 3260 3261 ArrayRef<int> Mask = SVN->getMask(); 3262 3263 if (SDValue V = 3264 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG)) 3265 return V; 3266 3267 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may 3268 // be undef which can be handled with a single SLIDEDOWN/UP. 3269 int LoSrc, HiSrc; 3270 int Rotation = isElementRotate(LoSrc, HiSrc, Mask); 3271 if (Rotation > 0) { 3272 SDValue LoV, HiV; 3273 if (LoSrc >= 0) { 3274 LoV = LoSrc == 0 ? V1 : V2; 3275 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget); 3276 } 3277 if (HiSrc >= 0) { 3278 HiV = HiSrc == 0 ? V1 : V2; 3279 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget); 3280 } 3281 3282 // We found a rotation. We need to slide HiV down by Rotation. Then we need 3283 // to slide LoV up by (NumElts - Rotation). 3284 unsigned InvRotate = NumElts - Rotation; 3285 3286 SDValue Res = DAG.getUNDEF(ContainerVT); 3287 if (HiV) { 3288 // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN. 3289 // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it 3290 // causes multiple vsetvlis in some test cases such as lowering 3291 // reduce.mul 3292 SDValue DownVL = VL; 3293 if (LoV) 3294 DownVL = DAG.getConstant(InvRotate, DL, XLenVT); 3295 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV, 3296 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, 3297 DownVL); 3298 } 3299 if (LoV) 3300 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV, 3301 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL, 3302 RISCVII::TAIL_AGNOSTIC); 3303 3304 return convertFromScalableVector(VT, Res, DAG, Subtarget); 3305 } 3306 3307 if (SDValue V = lowerVECTOR_SHUFFLEAsVNSRL( 3308 DL, VT, ContainerVT, V1, V2, TrueMask, VL, Mask, Subtarget, DAG)) 3309 return V; 3310 3311 // Detect an interleave shuffle and lower to 3312 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1)) 3313 bool SwapSources; 3314 if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) { 3315 // Swap sources if needed. 3316 if (SwapSources) 3317 std::swap(V1, V2); 3318 3319 // Extract the lower half of the vectors. 3320 MVT HalfVT = VT.getHalfNumVectorElementsVT(); 3321 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, 3322 DAG.getConstant(0, DL, XLenVT)); 3323 V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2, 3324 DAG.getConstant(0, DL, XLenVT)); 3325 3326 // Double the element width and halve the number of elements in an int type. 3327 unsigned EltBits = VT.getScalarSizeInBits(); 3328 MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2); 3329 MVT WideIntVT = 3330 MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2); 3331 // Convert this to a scalable vector. We need to base this on the 3332 // destination size to ensure there's always a type with a smaller LMUL. 3333 MVT WideIntContainerVT = 3334 getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget); 3335 3336 // Convert sources to scalable vectors with the same element count as the 3337 // larger type. 3338 MVT HalfContainerVT = MVT::getVectorVT( 3339 VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount()); 3340 V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget); 3341 V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget); 3342 3343 // Cast sources to integer. 3344 MVT IntEltVT = MVT::getIntegerVT(EltBits); 3345 MVT IntHalfVT = 3346 MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount()); 3347 V1 = DAG.getBitcast(IntHalfVT, V1); 3348 V2 = DAG.getBitcast(IntHalfVT, V2); 3349 3350 // Freeze V2 since we use it twice and we need to be sure that the add and 3351 // multiply see the same value. 3352 V2 = DAG.getFreeze(V2); 3353 3354 // Recreate TrueMask using the widened type's element count. 3355 TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG); 3356 3357 // Widen V1 and V2 with 0s and add one copy of V2 to V1. 3358 SDValue Add = 3359 DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1, V2, 3360 DAG.getUNDEF(WideIntContainerVT), TrueMask, VL); 3361 // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer. 3362 SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT, 3363 DAG.getUNDEF(IntHalfVT), 3364 DAG.getAllOnesConstant(DL, XLenVT), VL); 3365 SDValue WidenMul = 3366 DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT, V2, Multiplier, 3367 DAG.getUNDEF(WideIntContainerVT), TrueMask, VL); 3368 // Add the new copies to our previous addition giving us 2^eltbits copies of 3369 // V2. This is equivalent to shifting V2 left by eltbits. This should 3370 // combine with the vwmulu.vv above to form vwmaccu.vv. 3371 Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul, 3372 DAG.getUNDEF(WideIntContainerVT), TrueMask, VL); 3373 // Cast back to ContainerVT. We need to re-create a new ContainerVT in case 3374 // WideIntContainerVT is a larger fractional LMUL than implied by the fixed 3375 // vector VT. 3376 ContainerVT = 3377 MVT::getVectorVT(VT.getVectorElementType(), 3378 WideIntContainerVT.getVectorElementCount() * 2); 3379 Add = DAG.getBitcast(ContainerVT, Add); 3380 return convertFromScalableVector(VT, Add, DAG, Subtarget); 3381 } 3382 3383 // Detect shuffles which can be re-expressed as vector selects; these are 3384 // shuffles in which each element in the destination is taken from an element 3385 // at the corresponding index in either source vectors. 3386 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) { 3387 int MaskIndex = MaskIdx.value(); 3388 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; 3389 }); 3390 3391 assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); 3392 3393 SmallVector<SDValue> MaskVals; 3394 // As a backup, shuffles can be lowered via a vrgather instruction, possibly 3395 // merged with a second vrgather. 3396 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS; 3397 3398 // By default we preserve the original operand order, and use a mask to 3399 // select LHS as true and RHS as false. However, since RVV vector selects may 3400 // feature splats but only on the LHS, we may choose to invert our mask and 3401 // instead select between RHS and LHS. 3402 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); 3403 bool InvertMask = IsSelect == SwapOps; 3404 3405 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle 3406 // half. 3407 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts; 3408 3409 // Now construct the mask that will be used by the vselect or blended 3410 // vrgather operation. For vrgathers, construct the appropriate indices into 3411 // each vector. 3412 for (int MaskIndex : Mask) { 3413 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask; 3414 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 3415 if (!IsSelect) { 3416 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; 3417 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 3418 ? DAG.getConstant(MaskIndex, DL, XLenVT) 3419 : DAG.getUNDEF(XLenVT)); 3420 GatherIndicesRHS.push_back( 3421 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) 3422 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); 3423 if (IsLHSOrUndefIndex && MaskIndex >= 0) 3424 ++LHSIndexCounts[MaskIndex]; 3425 if (!IsLHSOrUndefIndex) 3426 ++RHSIndexCounts[MaskIndex - NumElts]; 3427 } 3428 } 3429 3430 if (SwapOps) { 3431 std::swap(V1, V2); 3432 std::swap(GatherIndicesLHS, GatherIndicesRHS); 3433 } 3434 3435 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 3436 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 3437 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 3438 3439 if (IsSelect) 3440 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); 3441 3442 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) { 3443 // On such a large vector we're unable to use i8 as the index type. 3444 // FIXME: We could promote the index to i16 and use vrgatherei16, but that 3445 // may involve vector splitting if we're already at LMUL=8, or our 3446 // user-supplied maximum fixed-length LMUL. 3447 return SDValue(); 3448 } 3449 3450 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL; 3451 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; 3452 MVT IndexVT = VT.changeTypeToInteger(); 3453 // Since we can't introduce illegal index types at this stage, use i16 and 3454 // vrgatherei16 if the corresponding index type for plain vrgather is greater 3455 // than XLenVT. 3456 if (IndexVT.getScalarType().bitsGT(XLenVT)) { 3457 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; 3458 IndexVT = IndexVT.changeVectorElementType(MVT::i16); 3459 } 3460 3461 MVT IndexContainerVT = 3462 ContainerVT.changeVectorElementType(IndexVT.getScalarType()); 3463 3464 SDValue Gather; 3465 // TODO: This doesn't trigger for i64 vectors on RV32, since there we 3466 // encounter a bitcasted BUILD_VECTOR with low/high i32 values. 3467 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { 3468 Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, 3469 Subtarget); 3470 } else { 3471 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 3472 // If only one index is used, we can use a "splat" vrgather. 3473 // TODO: We can splat the most-common index and fix-up any stragglers, if 3474 // that's beneficial. 3475 if (LHSIndexCounts.size() == 1) { 3476 int SplatIndex = LHSIndexCounts.begin()->getFirst(); 3477 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, 3478 DAG.getConstant(SplatIndex, DL, XLenVT), 3479 DAG.getUNDEF(ContainerVT), TrueMask, VL); 3480 } else { 3481 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); 3482 LHSIndices = 3483 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); 3484 3485 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, 3486 DAG.getUNDEF(ContainerVT), TrueMask, VL); 3487 } 3488 } 3489 3490 // If a second vector operand is used by this shuffle, blend it in with an 3491 // additional vrgather. 3492 if (!V2.isUndef()) { 3493 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); 3494 3495 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 3496 SelectMask = 3497 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); 3498 3499 // If only one index is used, we can use a "splat" vrgather. 3500 // TODO: We can splat the most-common index and fix-up any stragglers, if 3501 // that's beneficial. 3502 if (RHSIndexCounts.size() == 1) { 3503 int SplatIndex = RHSIndexCounts.begin()->getFirst(); 3504 Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, 3505 DAG.getConstant(SplatIndex, DL, XLenVT), Gather, 3506 SelectMask, VL); 3507 } else { 3508 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); 3509 RHSIndices = 3510 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); 3511 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, 3512 SelectMask, VL); 3513 } 3514 } 3515 3516 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 3517 } 3518 3519 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 3520 // Support splats for any type. These should type legalize well. 3521 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 3522 return true; 3523 3524 // Only support legal VTs for other shuffles for now. 3525 if (!isTypeLegal(VT)) 3526 return false; 3527 3528 MVT SVT = VT.getSimpleVT(); 3529 3530 bool SwapSources; 3531 int LoSrc, HiSrc; 3532 return (isElementRotate(LoSrc, HiSrc, M) > 0) || 3533 isInterleaveShuffle(M, SVT, SwapSources, Subtarget); 3534 } 3535 3536 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting 3537 // the exponent. 3538 SDValue 3539 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, 3540 SelectionDAG &DAG) const { 3541 MVT VT = Op.getSimpleValueType(); 3542 unsigned EltSize = VT.getScalarSizeInBits(); 3543 SDValue Src = Op.getOperand(0); 3544 SDLoc DL(Op); 3545 3546 // We choose FP type that can represent the value if possible. Otherwise, we 3547 // use rounding to zero conversion for correct exponent of the result. 3548 // TODO: Use f16 for i8 when possible? 3549 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32; 3550 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()))) 3551 FloatEltVT = MVT::f32; 3552 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); 3553 3554 // Legal types should have been checked in the RISCVTargetLowering 3555 // constructor. 3556 // TODO: Splitting may make sense in some cases. 3557 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && 3558 "Expected legal float type!"); 3559 3560 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. 3561 // The trailing zero count is equal to log2 of this single bit value. 3562 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 3563 SDValue Neg = DAG.getNegative(Src, DL, VT); 3564 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); 3565 } 3566 3567 // We have a legal FP type, convert to it. 3568 SDValue FloatVal; 3569 if (FloatVT.bitsGT(VT)) { 3570 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); 3571 } else { 3572 // Use RTZ to avoid rounding influencing exponent of FloatVal. 3573 MVT ContainerVT = VT; 3574 if (VT.isFixedLengthVector()) { 3575 ContainerVT = getContainerForFixedLengthVector(VT); 3576 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 3577 } 3578 3579 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3580 SDValue RTZRM = 3581 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()); 3582 MVT ContainerFloatVT = 3583 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount()); 3584 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT, 3585 Src, Mask, RTZRM, VL); 3586 if (VT.isFixedLengthVector()) 3587 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget); 3588 } 3589 // Bitcast to integer and shift the exponent to the LSB. 3590 EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); 3591 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); 3592 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; 3593 SDValue Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, 3594 DAG.getConstant(ShiftAmt, DL, IntVT)); 3595 // Restore back to original type. Truncation after SRL is to generate vnsrl. 3596 if (IntVT.bitsLT(VT)) 3597 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp); 3598 else if (IntVT.bitsGT(VT)) 3599 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp); 3600 // The exponent contains log2 of the value in biased form. 3601 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; 3602 3603 // For trailing zeros, we just need to subtract the bias. 3604 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) 3605 return DAG.getNode(ISD::SUB, DL, VT, Exp, 3606 DAG.getConstant(ExponentBias, DL, VT)); 3607 3608 // For leading zeros, we need to remove the bias and convert from log2 to 3609 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). 3610 unsigned Adjust = ExponentBias + (EltSize - 1); 3611 SDValue Res = 3612 DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp); 3613 // The above result with zero input equals to Adjust which is greater than 3614 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ. 3615 if (Op.getOpcode() == ISD::CTLZ) 3616 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT)); 3617 return Res; 3618 } 3619 3620 // While RVV has alignment restrictions, we should always be able to load as a 3621 // legal equivalently-sized byte-typed vector instead. This method is 3622 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If 3623 // the load is already correctly-aligned, it returns SDValue(). 3624 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, 3625 SelectionDAG &DAG) const { 3626 auto *Load = cast<LoadSDNode>(Op); 3627 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); 3628 3629 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 3630 Load->getMemoryVT(), 3631 *Load->getMemOperand())) 3632 return SDValue(); 3633 3634 SDLoc DL(Op); 3635 MVT VT = Op.getSimpleValueType(); 3636 unsigned EltSizeBits = VT.getScalarSizeInBits(); 3637 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 3638 "Unexpected unaligned RVV load type"); 3639 MVT NewVT = 3640 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 3641 assert(NewVT.isValid() && 3642 "Expecting equally-sized RVV vector types to be legal"); 3643 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), 3644 Load->getPointerInfo(), Load->getOriginalAlign(), 3645 Load->getMemOperand()->getFlags()); 3646 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); 3647 } 3648 3649 // While RVV has alignment restrictions, we should always be able to store as a 3650 // legal equivalently-sized byte-typed vector instead. This method is 3651 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It 3652 // returns SDValue() if the store is already correctly aligned. 3653 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, 3654 SelectionDAG &DAG) const { 3655 auto *Store = cast<StoreSDNode>(Op); 3656 assert(Store && Store->getValue().getValueType().isVector() && 3657 "Expected vector store"); 3658 3659 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 3660 Store->getMemoryVT(), 3661 *Store->getMemOperand())) 3662 return SDValue(); 3663 3664 SDLoc DL(Op); 3665 SDValue StoredVal = Store->getValue(); 3666 MVT VT = StoredVal.getSimpleValueType(); 3667 unsigned EltSizeBits = VT.getScalarSizeInBits(); 3668 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 3669 "Unexpected unaligned RVV store type"); 3670 MVT NewVT = 3671 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 3672 assert(NewVT.isValid() && 3673 "Expecting equally-sized RVV vector types to be legal"); 3674 StoredVal = DAG.getBitcast(NewVT, StoredVal); 3675 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), 3676 Store->getPointerInfo(), Store->getOriginalAlign(), 3677 Store->getMemOperand()->getFlags()); 3678 } 3679 3680 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, 3681 const RISCVSubtarget &Subtarget) { 3682 assert(Op.getValueType() == MVT::i64 && "Unexpected VT"); 3683 3684 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue(); 3685 3686 // All simm32 constants should be handled by isel. 3687 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making 3688 // this check redundant, but small immediates are common so this check 3689 // should have better compile time. 3690 if (isInt<32>(Imm)) 3691 return Op; 3692 3693 // We only need to cost the immediate, if constant pool lowering is enabled. 3694 if (!Subtarget.useConstantPoolForLargeInts()) 3695 return Op; 3696 3697 RISCVMatInt::InstSeq Seq = 3698 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 3699 if (Seq.size() <= Subtarget.getMaxBuildIntsCost()) 3700 return Op; 3701 3702 // Expand to a constant pool using the default expansion code. 3703 return SDValue(); 3704 } 3705 3706 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) { 3707 SDLoc dl(Op); 3708 SyncScope::ID FenceSSID = 3709 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 3710 3711 // singlethread fences only synchronize with signal handlers on the same 3712 // thread and thus only need to preserve instruction order, not actually 3713 // enforce memory ordering. 3714 if (FenceSSID == SyncScope::SingleThread) 3715 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 3716 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); 3717 3718 return Op; 3719 } 3720 3721 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 3722 SelectionDAG &DAG) const { 3723 switch (Op.getOpcode()) { 3724 default: 3725 report_fatal_error("unimplemented operand"); 3726 case ISD::ATOMIC_FENCE: 3727 return LowerATOMIC_FENCE(Op, DAG); 3728 case ISD::GlobalAddress: 3729 return lowerGlobalAddress(Op, DAG); 3730 case ISD::BlockAddress: 3731 return lowerBlockAddress(Op, DAG); 3732 case ISD::ConstantPool: 3733 return lowerConstantPool(Op, DAG); 3734 case ISD::JumpTable: 3735 return lowerJumpTable(Op, DAG); 3736 case ISD::GlobalTLSAddress: 3737 return lowerGlobalTLSAddress(Op, DAG); 3738 case ISD::Constant: 3739 return lowerConstant(Op, DAG, Subtarget); 3740 case ISD::SELECT: 3741 return lowerSELECT(Op, DAG); 3742 case ISD::BRCOND: 3743 return lowerBRCOND(Op, DAG); 3744 case ISD::VASTART: 3745 return lowerVASTART(Op, DAG); 3746 case ISD::FRAMEADDR: 3747 return lowerFRAMEADDR(Op, DAG); 3748 case ISD::RETURNADDR: 3749 return lowerRETURNADDR(Op, DAG); 3750 case ISD::SHL_PARTS: 3751 return lowerShiftLeftParts(Op, DAG); 3752 case ISD::SRA_PARTS: 3753 return lowerShiftRightParts(Op, DAG, true); 3754 case ISD::SRL_PARTS: 3755 return lowerShiftRightParts(Op, DAG, false); 3756 case ISD::BITCAST: { 3757 SDLoc DL(Op); 3758 EVT VT = Op.getValueType(); 3759 SDValue Op0 = Op.getOperand(0); 3760 EVT Op0VT = Op0.getValueType(); 3761 MVT XLenVT = Subtarget.getXLenVT(); 3762 if (VT == MVT::f16 && Op0VT == MVT::i16 && 3763 Subtarget.hasStdExtZfhOrZfhmin()) { 3764 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 3765 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 3766 return FPConv; 3767 } 3768 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && 3769 Subtarget.hasStdExtF()) { 3770 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 3771 SDValue FPConv = 3772 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 3773 return FPConv; 3774 } 3775 3776 // Consider other scalar<->scalar casts as legal if the types are legal. 3777 // Otherwise expand them. 3778 if (!VT.isVector() && !Op0VT.isVector()) { 3779 if (isTypeLegal(VT) && isTypeLegal(Op0VT)) 3780 return Op; 3781 return SDValue(); 3782 } 3783 3784 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() && 3785 "Unexpected types"); 3786 3787 if (VT.isFixedLengthVector()) { 3788 // We can handle fixed length vector bitcasts with a simple replacement 3789 // in isel. 3790 if (Op0VT.isFixedLengthVector()) 3791 return Op; 3792 // When bitcasting from scalar to fixed-length vector, insert the scalar 3793 // into a one-element vector of the result type, and perform a vector 3794 // bitcast. 3795 if (!Op0VT.isVector()) { 3796 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); 3797 if (!isTypeLegal(BVT)) 3798 return SDValue(); 3799 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, 3800 DAG.getUNDEF(BVT), Op0, 3801 DAG.getConstant(0, DL, XLenVT))); 3802 } 3803 return SDValue(); 3804 } 3805 // Custom-legalize bitcasts from fixed-length vector types to scalar types 3806 // thus: bitcast the vector to a one-element vector type whose element type 3807 // is the same as the result type, and extract the first element. 3808 if (!VT.isVector() && Op0VT.isFixedLengthVector()) { 3809 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 3810 if (!isTypeLegal(BVT)) 3811 return SDValue(); 3812 SDValue BVec = DAG.getBitcast(BVT, Op0); 3813 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 3814 DAG.getConstant(0, DL, XLenVT)); 3815 } 3816 return SDValue(); 3817 } 3818 case ISD::INTRINSIC_WO_CHAIN: 3819 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3820 case ISD::INTRINSIC_W_CHAIN: 3821 return LowerINTRINSIC_W_CHAIN(Op, DAG); 3822 case ISD::INTRINSIC_VOID: 3823 return LowerINTRINSIC_VOID(Op, DAG); 3824 case ISD::BITREVERSE: { 3825 MVT VT = Op.getSimpleValueType(); 3826 SDLoc DL(Op); 3827 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization"); 3828 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode"); 3829 // Expand bitreverse to a bswap(rev8) followed by brev8. 3830 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0)); 3831 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap); 3832 } 3833 case ISD::TRUNCATE: 3834 // Only custom-lower vector truncates 3835 if (!Op.getSimpleValueType().isVector()) 3836 return Op; 3837 return lowerVectorTruncLike(Op, DAG); 3838 case ISD::ANY_EXTEND: 3839 case ISD::ZERO_EXTEND: 3840 if (Op.getOperand(0).getValueType().isVector() && 3841 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 3842 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 3843 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 3844 case ISD::SIGN_EXTEND: 3845 if (Op.getOperand(0).getValueType().isVector() && 3846 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 3847 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 3848 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 3849 case ISD::SPLAT_VECTOR_PARTS: 3850 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 3851 case ISD::INSERT_VECTOR_ELT: 3852 return lowerINSERT_VECTOR_ELT(Op, DAG); 3853 case ISD::EXTRACT_VECTOR_ELT: 3854 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 3855 case ISD::VSCALE: { 3856 MVT VT = Op.getSimpleValueType(); 3857 SDLoc DL(Op); 3858 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 3859 // We define our scalable vector types for lmul=1 to use a 64 bit known 3860 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 3861 // vscale as VLENB / 8. 3862 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); 3863 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) 3864 report_fatal_error("Support for VLEN==32 is incomplete."); 3865 // We assume VLENB is a multiple of 8. We manually choose the best shift 3866 // here because SimplifyDemandedBits isn't always able to simplify it. 3867 uint64_t Val = Op.getConstantOperandVal(0); 3868 if (isPowerOf2_64(Val)) { 3869 uint64_t Log2 = Log2_64(Val); 3870 if (Log2 < 3) 3871 return DAG.getNode(ISD::SRL, DL, VT, VLENB, 3872 DAG.getConstant(3 - Log2, DL, VT)); 3873 if (Log2 > 3) 3874 return DAG.getNode(ISD::SHL, DL, VT, VLENB, 3875 DAG.getConstant(Log2 - 3, DL, VT)); 3876 return VLENB; 3877 } 3878 // If the multiplier is a multiple of 8, scale it down to avoid needing 3879 // to shift the VLENB value. 3880 if ((Val % 8) == 0) 3881 return DAG.getNode(ISD::MUL, DL, VT, VLENB, 3882 DAG.getConstant(Val / 8, DL, VT)); 3883 3884 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 3885 DAG.getConstant(3, DL, VT)); 3886 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 3887 } 3888 case ISD::FPOWI: { 3889 // Custom promote f16 powi with illegal i32 integer type on RV64. Once 3890 // promoted this will be legalized into a libcall by LegalizeIntegerTypes. 3891 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && 3892 Op.getOperand(1).getValueType() == MVT::i32) { 3893 SDLoc DL(Op); 3894 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); 3895 SDValue Powi = 3896 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); 3897 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, 3898 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); 3899 } 3900 return SDValue(); 3901 } 3902 case ISD::FP_EXTEND: 3903 case ISD::FP_ROUND: 3904 if (!Op.getValueType().isVector()) 3905 return Op; 3906 return lowerVectorFPExtendOrRoundLike(Op, DAG); 3907 case ISD::FP_TO_SINT: 3908 case ISD::FP_TO_UINT: 3909 case ISD::SINT_TO_FP: 3910 case ISD::UINT_TO_FP: { 3911 // RVV can only do fp<->int conversions to types half/double the size as 3912 // the source. We custom-lower any conversions that do two hops into 3913 // sequences. 3914 MVT VT = Op.getSimpleValueType(); 3915 if (!VT.isVector()) 3916 return Op; 3917 SDLoc DL(Op); 3918 SDValue Src = Op.getOperand(0); 3919 MVT EltVT = VT.getVectorElementType(); 3920 MVT SrcVT = Src.getSimpleValueType(); 3921 MVT SrcEltVT = SrcVT.getVectorElementType(); 3922 unsigned EltSize = EltVT.getSizeInBits(); 3923 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 3924 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 3925 "Unexpected vector element types"); 3926 3927 bool IsInt2FP = SrcEltVT.isInteger(); 3928 // Widening conversions 3929 if (EltSize > (2 * SrcEltSize)) { 3930 if (IsInt2FP) { 3931 // Do a regular integer sign/zero extension then convert to float. 3932 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2), 3933 VT.getVectorElementCount()); 3934 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 3935 ? ISD::ZERO_EXTEND 3936 : ISD::SIGN_EXTEND; 3937 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 3938 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 3939 } 3940 // FP2Int 3941 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 3942 // Do one doubling fp_extend then complete the operation by converting 3943 // to int. 3944 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 3945 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 3946 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 3947 } 3948 3949 // Narrowing conversions 3950 if (SrcEltSize > (2 * EltSize)) { 3951 if (IsInt2FP) { 3952 // One narrowing int_to_fp, then an fp_round. 3953 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 3954 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 3955 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 3956 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 3957 } 3958 // FP2Int 3959 // One narrowing fp_to_int, then truncate the integer. If the float isn't 3960 // representable by the integer, the result is poison. 3961 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 3962 VT.getVectorElementCount()); 3963 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 3964 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 3965 } 3966 3967 // Scalable vectors can exit here. Patterns will handle equally-sized 3968 // conversions halving/doubling ones. 3969 if (!VT.isFixedLengthVector()) 3970 return Op; 3971 3972 // For fixed-length vectors we lower to a custom "VL" node. 3973 unsigned RVVOpc = 0; 3974 switch (Op.getOpcode()) { 3975 default: 3976 llvm_unreachable("Impossible opcode"); 3977 case ISD::FP_TO_SINT: 3978 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL; 3979 break; 3980 case ISD::FP_TO_UINT: 3981 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL; 3982 break; 3983 case ISD::SINT_TO_FP: 3984 RVVOpc = RISCVISD::SINT_TO_FP_VL; 3985 break; 3986 case ISD::UINT_TO_FP: 3987 RVVOpc = RISCVISD::UINT_TO_FP_VL; 3988 break; 3989 } 3990 3991 MVT ContainerVT = getContainerForFixedLengthVector(VT); 3992 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 3993 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() && 3994 "Expected same element count"); 3995 3996 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3997 3998 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 3999 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 4000 return convertFromScalableVector(VT, Src, DAG, Subtarget); 4001 } 4002 case ISD::FP_TO_SINT_SAT: 4003 case ISD::FP_TO_UINT_SAT: 4004 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget); 4005 case ISD::FTRUNC: 4006 case ISD::FCEIL: 4007 case ISD::FFLOOR: 4008 case ISD::FRINT: 4009 case ISD::FROUND: 4010 case ISD::FROUNDEVEN: 4011 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 4012 case ISD::VECREDUCE_ADD: 4013 case ISD::VECREDUCE_UMAX: 4014 case ISD::VECREDUCE_SMAX: 4015 case ISD::VECREDUCE_UMIN: 4016 case ISD::VECREDUCE_SMIN: 4017 return lowerVECREDUCE(Op, DAG); 4018 case ISD::VECREDUCE_AND: 4019 case ISD::VECREDUCE_OR: 4020 case ISD::VECREDUCE_XOR: 4021 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 4022 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false); 4023 return lowerVECREDUCE(Op, DAG); 4024 case ISD::VECREDUCE_FADD: 4025 case ISD::VECREDUCE_SEQ_FADD: 4026 case ISD::VECREDUCE_FMIN: 4027 case ISD::VECREDUCE_FMAX: 4028 return lowerFPVECREDUCE(Op, DAG); 4029 case ISD::VP_REDUCE_ADD: 4030 case ISD::VP_REDUCE_UMAX: 4031 case ISD::VP_REDUCE_SMAX: 4032 case ISD::VP_REDUCE_UMIN: 4033 case ISD::VP_REDUCE_SMIN: 4034 case ISD::VP_REDUCE_FADD: 4035 case ISD::VP_REDUCE_SEQ_FADD: 4036 case ISD::VP_REDUCE_FMIN: 4037 case ISD::VP_REDUCE_FMAX: 4038 return lowerVPREDUCE(Op, DAG); 4039 case ISD::VP_REDUCE_AND: 4040 case ISD::VP_REDUCE_OR: 4041 case ISD::VP_REDUCE_XOR: 4042 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) 4043 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); 4044 return lowerVPREDUCE(Op, DAG); 4045 case ISD::INSERT_SUBVECTOR: 4046 return lowerINSERT_SUBVECTOR(Op, DAG); 4047 case ISD::EXTRACT_SUBVECTOR: 4048 return lowerEXTRACT_SUBVECTOR(Op, DAG); 4049 case ISD::STEP_VECTOR: 4050 return lowerSTEP_VECTOR(Op, DAG); 4051 case ISD::VECTOR_REVERSE: 4052 return lowerVECTOR_REVERSE(Op, DAG); 4053 case ISD::VECTOR_SPLICE: 4054 return lowerVECTOR_SPLICE(Op, DAG); 4055 case ISD::BUILD_VECTOR: 4056 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 4057 case ISD::SPLAT_VECTOR: 4058 if (Op.getValueType().getVectorElementType() == MVT::i1) 4059 return lowerVectorMaskSplat(Op, DAG); 4060 return SDValue(); 4061 case ISD::VECTOR_SHUFFLE: 4062 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 4063 case ISD::CONCAT_VECTORS: { 4064 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 4065 // better than going through the stack, as the default expansion does. 4066 SDLoc DL(Op); 4067 MVT VT = Op.getSimpleValueType(); 4068 unsigned NumOpElts = 4069 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 4070 SDValue Vec = DAG.getUNDEF(VT); 4071 for (const auto &OpIdx : enumerate(Op->ops())) { 4072 SDValue SubVec = OpIdx.value(); 4073 // Don't insert undef subvectors. 4074 if (SubVec.isUndef()) 4075 continue; 4076 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec, 4077 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 4078 } 4079 return Vec; 4080 } 4081 case ISD::LOAD: 4082 if (auto V = expandUnalignedRVVLoad(Op, DAG)) 4083 return V; 4084 if (Op.getValueType().isFixedLengthVector()) 4085 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 4086 return Op; 4087 case ISD::STORE: 4088 if (auto V = expandUnalignedRVVStore(Op, DAG)) 4089 return V; 4090 if (Op.getOperand(1).getValueType().isFixedLengthVector()) 4091 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 4092 return Op; 4093 case ISD::MLOAD: 4094 case ISD::VP_LOAD: 4095 return lowerMaskedLoad(Op, DAG); 4096 case ISD::MSTORE: 4097 case ISD::VP_STORE: 4098 return lowerMaskedStore(Op, DAG); 4099 case ISD::SELECT_CC: { 4100 // This occurs because we custom legalize SETGT and SETUGT for setcc. That 4101 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand 4102 // into separate SETCC+SELECT_CC just like LegalizeDAG. 4103 SDValue Tmp1 = Op.getOperand(0); 4104 SDValue Tmp2 = Op.getOperand(1); 4105 SDValue True = Op.getOperand(2); 4106 SDValue False = Op.getOperand(3); 4107 EVT VT = Op.getValueType(); 4108 SDValue CC = Op.getOperand(4); 4109 EVT CmpVT = Tmp1.getValueType(); 4110 EVT CCVT = 4111 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); 4112 SDLoc DL(Op); 4113 SDValue Cond = 4114 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags()); 4115 return DAG.getSelect(DL, VT, Cond, True, False); 4116 } 4117 case ISD::SETCC: { 4118 MVT OpVT = Op.getOperand(0).getSimpleValueType(); 4119 if (OpVT.isScalarInteger()) { 4120 MVT VT = Op.getSimpleValueType(); 4121 SDValue LHS = Op.getOperand(0); 4122 SDValue RHS = Op.getOperand(1); 4123 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 4124 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) && 4125 "Unexpected CondCode"); 4126 4127 SDLoc DL(Op); 4128 4129 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can 4130 // convert this to the equivalent of (set(u)ge X, C+1) by using 4131 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant 4132 // in a register. 4133 if (isa<ConstantSDNode>(RHS)) { 4134 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue(); 4135 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) { 4136 // X > -1 should have been replaced with false. 4137 assert((CCVal != ISD::SETUGT || Imm != -1) && 4138 "Missing canonicalization"); 4139 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT. 4140 CCVal = ISD::getSetCCSwappedOperands(CCVal); 4141 SDValue SetCC = DAG.getSetCC( 4142 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal); 4143 return DAG.getLogicalNOT(DL, SetCC, VT); 4144 } 4145 } 4146 4147 // Not a constant we could handle, swap the operands and condition code to 4148 // SETLT/SETULT. 4149 CCVal = ISD::getSetCCSwappedOperands(CCVal); 4150 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal); 4151 } 4152 4153 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 4154 } 4155 case ISD::ADD: 4156 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true); 4157 case ISD::SUB: 4158 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true); 4159 case ISD::MUL: 4160 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true); 4161 case ISD::MULHS: 4162 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL, /*HasMergeOp*/ true); 4163 case ISD::MULHU: 4164 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL, /*HasMergeOp*/ true); 4165 case ISD::AND: 4166 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 4167 RISCVISD::AND_VL); 4168 case ISD::OR: 4169 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 4170 RISCVISD::OR_VL); 4171 case ISD::XOR: 4172 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 4173 RISCVISD::XOR_VL); 4174 case ISD::SDIV: 4175 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true); 4176 case ISD::SREM: 4177 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true); 4178 case ISD::UDIV: 4179 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true); 4180 case ISD::UREM: 4181 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true); 4182 case ISD::SHL: 4183 case ISD::SRA: 4184 case ISD::SRL: 4185 if (Op.getSimpleValueType().isFixedLengthVector()) 4186 return lowerFixedLengthVectorShiftToRVV(Op, DAG); 4187 // This can be called for an i32 shift amount that needs to be promoted. 4188 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && 4189 "Unexpected custom legalisation"); 4190 return SDValue(); 4191 case ISD::SADDSAT: 4192 return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL, 4193 /*HasMergeOp*/ true); 4194 case ISD::UADDSAT: 4195 return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL, 4196 /*HasMergeOp*/ true); 4197 case ISD::SSUBSAT: 4198 return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL, 4199 /*HasMergeOp*/ true); 4200 case ISD::USUBSAT: 4201 return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL, 4202 /*HasMergeOp*/ true); 4203 case ISD::FADD: 4204 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true); 4205 case ISD::FSUB: 4206 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true); 4207 case ISD::FMUL: 4208 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true); 4209 case ISD::FDIV: 4210 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true); 4211 case ISD::FNEG: 4212 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 4213 case ISD::FABS: 4214 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 4215 case ISD::FSQRT: 4216 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 4217 case ISD::FMA: 4218 return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL); 4219 case ISD::SMIN: 4220 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true); 4221 case ISD::SMAX: 4222 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true); 4223 case ISD::UMIN: 4224 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true); 4225 case ISD::UMAX: 4226 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true); 4227 case ISD::FMINNUM: 4228 return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL, 4229 /*HasMergeOp*/ true); 4230 case ISD::FMAXNUM: 4231 return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL, 4232 /*HasMergeOp*/ true); 4233 case ISD::ABS: 4234 case ISD::VP_ABS: 4235 return lowerABS(Op, DAG); 4236 case ISD::CTLZ: 4237 case ISD::CTLZ_ZERO_UNDEF: 4238 case ISD::CTTZ_ZERO_UNDEF: 4239 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 4240 case ISD::VSELECT: 4241 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 4242 case ISD::FCOPYSIGN: 4243 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 4244 case ISD::MGATHER: 4245 case ISD::VP_GATHER: 4246 return lowerMaskedGather(Op, DAG); 4247 case ISD::MSCATTER: 4248 case ISD::VP_SCATTER: 4249 return lowerMaskedScatter(Op, DAG); 4250 case ISD::GET_ROUNDING: 4251 return lowerGET_ROUNDING(Op, DAG); 4252 case ISD::SET_ROUNDING: 4253 return lowerSET_ROUNDING(Op, DAG); 4254 case ISD::EH_DWARF_CFA: 4255 return lowerEH_DWARF_CFA(Op, DAG); 4256 case ISD::VP_SELECT: 4257 return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL); 4258 case ISD::VP_MERGE: 4259 return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL); 4260 case ISD::VP_ADD: 4261 return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true); 4262 case ISD::VP_SUB: 4263 return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true); 4264 case ISD::VP_MUL: 4265 return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true); 4266 case ISD::VP_SDIV: 4267 return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true); 4268 case ISD::VP_UDIV: 4269 return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true); 4270 case ISD::VP_SREM: 4271 return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true); 4272 case ISD::VP_UREM: 4273 return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true); 4274 case ISD::VP_AND: 4275 return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL); 4276 case ISD::VP_OR: 4277 return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL); 4278 case ISD::VP_XOR: 4279 return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL); 4280 case ISD::VP_ASHR: 4281 return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true); 4282 case ISD::VP_LSHR: 4283 return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true); 4284 case ISD::VP_SHL: 4285 return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true); 4286 case ISD::VP_FADD: 4287 return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true); 4288 case ISD::VP_FSUB: 4289 return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true); 4290 case ISD::VP_FMUL: 4291 return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true); 4292 case ISD::VP_FDIV: 4293 return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true); 4294 case ISD::VP_FNEG: 4295 return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL); 4296 case ISD::VP_FABS: 4297 return lowerVPOp(Op, DAG, RISCVISD::FABS_VL); 4298 case ISD::VP_SQRT: 4299 return lowerVPOp(Op, DAG, RISCVISD::FSQRT_VL); 4300 case ISD::VP_FMA: 4301 return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL); 4302 case ISD::VP_FMINNUM: 4303 return lowerVPOp(Op, DAG, RISCVISD::FMINNUM_VL, /*HasMergeOp*/ true); 4304 case ISD::VP_FMAXNUM: 4305 return lowerVPOp(Op, DAG, RISCVISD::FMAXNUM_VL, /*HasMergeOp*/ true); 4306 case ISD::VP_FCOPYSIGN: 4307 return lowerVPOp(Op, DAG, RISCVISD::FCOPYSIGN_VL, /*HasMergeOp*/ true); 4308 case ISD::VP_SIGN_EXTEND: 4309 case ISD::VP_ZERO_EXTEND: 4310 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 4311 return lowerVPExtMaskOp(Op, DAG); 4312 return lowerVPOp(Op, DAG, 4313 Op.getOpcode() == ISD::VP_SIGN_EXTEND 4314 ? RISCVISD::VSEXT_VL 4315 : RISCVISD::VZEXT_VL); 4316 case ISD::VP_TRUNCATE: 4317 return lowerVectorTruncLike(Op, DAG); 4318 case ISD::VP_FP_EXTEND: 4319 case ISD::VP_FP_ROUND: 4320 return lowerVectorFPExtendOrRoundLike(Op, DAG); 4321 case ISD::VP_FP_TO_SINT: 4322 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_X_F_VL); 4323 case ISD::VP_FP_TO_UINT: 4324 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_XU_F_VL); 4325 case ISD::VP_SINT_TO_FP: 4326 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL); 4327 case ISD::VP_UINT_TO_FP: 4328 return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL); 4329 case ISD::VP_SETCC: 4330 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) 4331 return lowerVPSetCCMaskOp(Op, DAG); 4332 return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true); 4333 case ISD::VP_SMIN: 4334 return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true); 4335 case ISD::VP_SMAX: 4336 return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true); 4337 case ISD::VP_UMIN: 4338 return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true); 4339 case ISD::VP_UMAX: 4340 return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true); 4341 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: 4342 return lowerVPStridedLoad(Op, DAG); 4343 case ISD::EXPERIMENTAL_VP_STRIDED_STORE: 4344 return lowerVPStridedStore(Op, DAG); 4345 case ISD::VP_FCEIL: 4346 case ISD::VP_FFLOOR: 4347 case ISD::VP_FRINT: 4348 case ISD::VP_FNEARBYINT: 4349 case ISD::VP_FROUND: 4350 case ISD::VP_FROUNDEVEN: 4351 case ISD::VP_FROUNDTOZERO: 4352 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); 4353 } 4354 } 4355 4356 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 4357 SelectionDAG &DAG, unsigned Flags) { 4358 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 4359 } 4360 4361 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 4362 SelectionDAG &DAG, unsigned Flags) { 4363 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 4364 Flags); 4365 } 4366 4367 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 4368 SelectionDAG &DAG, unsigned Flags) { 4369 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 4370 N->getOffset(), Flags); 4371 } 4372 4373 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 4374 SelectionDAG &DAG, unsigned Flags) { 4375 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 4376 } 4377 4378 template <class NodeTy> 4379 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 4380 bool IsLocal) const { 4381 SDLoc DL(N); 4382 EVT Ty = getPointerTy(DAG.getDataLayout()); 4383 4384 // When HWASAN is used and tagging of global variables is enabled 4385 // they should be accessed via the GOT, since the tagged address of a global 4386 // is incompatible with existing code models. This also applies to non-pic 4387 // mode. 4388 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) { 4389 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 4390 if (IsLocal && !Subtarget.allowTaggedGlobals()) 4391 // Use PC-relative addressing to access the symbol. This generates the 4392 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 4393 // %pcrel_lo(auipc)). 4394 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 4395 4396 // Use PC-relative addressing to access the GOT for this symbol, then load 4397 // the address from the GOT. This generates the pattern (PseudoLA sym), 4398 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 4399 MachineFunction &MF = DAG.getMachineFunction(); 4400 MachineMemOperand *MemOp = MF.getMachineMemOperand( 4401 MachinePointerInfo::getGOT(MF), 4402 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 4403 MachineMemOperand::MOInvariant, 4404 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 4405 SDValue Load = 4406 DAG.getMemIntrinsicNode(RISCVISD::LA, DL, DAG.getVTList(Ty, MVT::Other), 4407 {DAG.getEntryNode(), Addr}, Ty, MemOp); 4408 return Load; 4409 } 4410 4411 switch (getTargetMachine().getCodeModel()) { 4412 default: 4413 report_fatal_error("Unsupported code model for lowering"); 4414 case CodeModel::Small: { 4415 // Generate a sequence for accessing addresses within the first 2 GiB of 4416 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 4417 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 4418 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 4419 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 4420 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo); 4421 } 4422 case CodeModel::Medium: { 4423 // Generate a sequence for accessing addresses within any 2GiB range within 4424 // the address space. This generates the pattern (PseudoLLA sym), which 4425 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 4426 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 4427 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr); 4428 } 4429 } 4430 } 4431 4432 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 4433 SelectionDAG &DAG) const { 4434 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 4435 assert(N->getOffset() == 0 && "unexpected offset in global node"); 4436 return getAddr(N, DAG, N->getGlobal()->isDSOLocal()); 4437 } 4438 4439 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 4440 SelectionDAG &DAG) const { 4441 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 4442 4443 return getAddr(N, DAG); 4444 } 4445 4446 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 4447 SelectionDAG &DAG) const { 4448 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 4449 4450 return getAddr(N, DAG); 4451 } 4452 4453 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 4454 SelectionDAG &DAG) const { 4455 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 4456 4457 return getAddr(N, DAG); 4458 } 4459 4460 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 4461 SelectionDAG &DAG, 4462 bool UseGOT) const { 4463 SDLoc DL(N); 4464 EVT Ty = getPointerTy(DAG.getDataLayout()); 4465 const GlobalValue *GV = N->getGlobal(); 4466 MVT XLenVT = Subtarget.getXLenVT(); 4467 4468 if (UseGOT) { 4469 // Use PC-relative addressing to access the GOT for this TLS symbol, then 4470 // load the address from the GOT and add the thread pointer. This generates 4471 // the pattern (PseudoLA_TLS_IE sym), which expands to 4472 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 4473 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 4474 MachineFunction &MF = DAG.getMachineFunction(); 4475 MachineMemOperand *MemOp = MF.getMachineMemOperand( 4476 MachinePointerInfo::getGOT(MF), 4477 MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | 4478 MachineMemOperand::MOInvariant, 4479 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8)); 4480 SDValue Load = DAG.getMemIntrinsicNode( 4481 RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other), 4482 {DAG.getEntryNode(), Addr}, Ty, MemOp); 4483 4484 // Add the thread pointer. 4485 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 4486 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 4487 } 4488 4489 // Generate a sequence for accessing the address relative to the thread 4490 // pointer, with the appropriate adjustment for the thread pointer offset. 4491 // This generates the pattern 4492 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 4493 SDValue AddrHi = 4494 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 4495 SDValue AddrAdd = 4496 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 4497 SDValue AddrLo = 4498 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 4499 4500 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); 4501 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 4502 SDValue MNAdd = 4503 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd); 4504 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo); 4505 } 4506 4507 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 4508 SelectionDAG &DAG) const { 4509 SDLoc DL(N); 4510 EVT Ty = getPointerTy(DAG.getDataLayout()); 4511 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 4512 const GlobalValue *GV = N->getGlobal(); 4513 4514 // Use a PC-relative addressing mode to access the global dynamic GOT address. 4515 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 4516 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 4517 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 4518 SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr); 4519 4520 // Prepare argument list to generate call. 4521 ArgListTy Args; 4522 ArgListEntry Entry; 4523 Entry.Node = Load; 4524 Entry.Ty = CallTy; 4525 Args.push_back(Entry); 4526 4527 // Setup call to __tls_get_addr. 4528 TargetLowering::CallLoweringInfo CLI(DAG); 4529 CLI.setDebugLoc(DL) 4530 .setChain(DAG.getEntryNode()) 4531 .setLibCallee(CallingConv::C, CallTy, 4532 DAG.getExternalSymbol("__tls_get_addr", Ty), 4533 std::move(Args)); 4534 4535 return LowerCallTo(CLI).first; 4536 } 4537 4538 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 4539 SelectionDAG &DAG) const { 4540 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 4541 assert(N->getOffset() == 0 && "unexpected offset in global node"); 4542 4543 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 4544 4545 if (DAG.getMachineFunction().getFunction().getCallingConv() == 4546 CallingConv::GHC) 4547 report_fatal_error("In GHC calling convention TLS is not supported"); 4548 4549 SDValue Addr; 4550 switch (Model) { 4551 case TLSModel::LocalExec: 4552 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 4553 break; 4554 case TLSModel::InitialExec: 4555 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 4556 break; 4557 case TLSModel::LocalDynamic: 4558 case TLSModel::GeneralDynamic: 4559 Addr = getDynamicTLSAddr(N, DAG); 4560 break; 4561 } 4562 4563 return Addr; 4564 } 4565 4566 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 4567 SDValue CondV = Op.getOperand(0); 4568 SDValue TrueV = Op.getOperand(1); 4569 SDValue FalseV = Op.getOperand(2); 4570 SDLoc DL(Op); 4571 MVT VT = Op.getSimpleValueType(); 4572 MVT XLenVT = Subtarget.getXLenVT(); 4573 4574 // Lower vector SELECTs to VSELECTs by splatting the condition. 4575 if (VT.isVector()) { 4576 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1); 4577 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV); 4578 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); 4579 } 4580 4581 if (!Subtarget.hasShortForwardBranchOpt()) { 4582 // (select c, -1, y) -> -c | y 4583 if (isAllOnesConstant(TrueV)) { 4584 SDValue Neg = DAG.getNegative(CondV, DL, VT); 4585 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); 4586 } 4587 // (select c, y, -1) -> (c-1) | y 4588 if (isAllOnesConstant(FalseV)) { 4589 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 4590 DAG.getAllOnesConstant(DL, VT)); 4591 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); 4592 } 4593 4594 // (select c, 0, y) -> (c-1) & y 4595 if (isNullConstant(TrueV)) { 4596 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, 4597 DAG.getAllOnesConstant(DL, VT)); 4598 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); 4599 } 4600 // (select c, y, 0) -> -c & y 4601 if (isNullConstant(FalseV)) { 4602 SDValue Neg = DAG.getNegative(CondV, DL, VT); 4603 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); 4604 } 4605 } 4606 4607 // If the condition is not an integer SETCC which operates on XLenVT, we need 4608 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.: 4609 // (select condv, truev, falsev) 4610 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 4611 if (CondV.getOpcode() != ISD::SETCC || 4612 CondV.getOperand(0).getSimpleValueType() != XLenVT) { 4613 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 4614 SDValue SetNE = DAG.getCondCode(ISD::SETNE); 4615 4616 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 4617 4618 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 4619 } 4620 4621 // If the CondV is the output of a SETCC node which operates on XLenVT inputs, 4622 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take 4623 // advantage of the integer compare+branch instructions. i.e.: 4624 // (select (setcc lhs, rhs, cc), truev, falsev) 4625 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 4626 SDValue LHS = CondV.getOperand(0); 4627 SDValue RHS = CondV.getOperand(1); 4628 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 4629 4630 // Special case for a select of 2 constants that have a diffence of 1. 4631 // Normally this is done by DAGCombine, but if the select is introduced by 4632 // type legalization or op legalization, we miss it. Restricting to SETLT 4633 // case for now because that is what signed saturating add/sub need. 4634 // FIXME: We don't need the condition to be SETLT or even a SETCC, 4635 // but we would probably want to swap the true/false values if the condition 4636 // is SETGE/SETLE to avoid an XORI. 4637 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && 4638 CCVal == ISD::SETLT) { 4639 const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue(); 4640 const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue(); 4641 if (TrueVal - 1 == FalseVal) 4642 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV); 4643 if (TrueVal + 1 == FalseVal) 4644 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV); 4645 } 4646 4647 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4648 // 1 < x ? x : 1 -> 0 < x ? x : 1 4649 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) && 4650 RHS == TrueV && LHS == FalseV) { 4651 LHS = DAG.getConstant(0, DL, VT); 4652 // 0 <u x is the same as x != 0. 4653 if (CCVal == ISD::SETULT) { 4654 std::swap(LHS, RHS); 4655 CCVal = ISD::SETNE; 4656 } 4657 } 4658 4659 // x <s -1 ? x : -1 -> x <s 0 ? x : -1 4660 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV && 4661 RHS == FalseV) { 4662 RHS = DAG.getConstant(0, DL, VT); 4663 } 4664 4665 SDValue TargetCC = DAG.getCondCode(CCVal); 4666 4667 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) { 4668 // (select (setcc lhs, rhs, CC), constant, falsev) 4669 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant) 4670 std::swap(TrueV, FalseV); 4671 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType())); 4672 } 4673 4674 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 4675 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops); 4676 } 4677 4678 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 4679 SDValue CondV = Op.getOperand(1); 4680 SDLoc DL(Op); 4681 MVT XLenVT = Subtarget.getXLenVT(); 4682 4683 if (CondV.getOpcode() == ISD::SETCC && 4684 CondV.getOperand(0).getValueType() == XLenVT) { 4685 SDValue LHS = CondV.getOperand(0); 4686 SDValue RHS = CondV.getOperand(1); 4687 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 4688 4689 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 4690 4691 SDValue TargetCC = DAG.getCondCode(CCVal); 4692 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 4693 LHS, RHS, TargetCC, Op.getOperand(2)); 4694 } 4695 4696 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 4697 CondV, DAG.getConstant(0, DL, XLenVT), 4698 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 4699 } 4700 4701 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 4702 MachineFunction &MF = DAG.getMachineFunction(); 4703 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 4704 4705 SDLoc DL(Op); 4706 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 4707 getPointerTy(MF.getDataLayout())); 4708 4709 // vastart just stores the address of the VarArgsFrameIndex slot into the 4710 // memory location argument. 4711 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 4712 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 4713 MachinePointerInfo(SV)); 4714 } 4715 4716 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 4717 SelectionDAG &DAG) const { 4718 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 4719 MachineFunction &MF = DAG.getMachineFunction(); 4720 MachineFrameInfo &MFI = MF.getFrameInfo(); 4721 MFI.setFrameAddressIsTaken(true); 4722 Register FrameReg = RI.getFrameRegister(MF); 4723 int XLenInBytes = Subtarget.getXLen() / 8; 4724 4725 EVT VT = Op.getValueType(); 4726 SDLoc DL(Op); 4727 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 4728 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4729 while (Depth--) { 4730 int Offset = -(XLenInBytes * 2); 4731 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 4732 DAG.getIntPtrConstant(Offset, DL)); 4733 FrameAddr = 4734 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 4735 } 4736 return FrameAddr; 4737 } 4738 4739 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 4740 SelectionDAG &DAG) const { 4741 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 4742 MachineFunction &MF = DAG.getMachineFunction(); 4743 MachineFrameInfo &MFI = MF.getFrameInfo(); 4744 MFI.setReturnAddressIsTaken(true); 4745 MVT XLenVT = Subtarget.getXLenVT(); 4746 int XLenInBytes = Subtarget.getXLen() / 8; 4747 4748 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 4749 return SDValue(); 4750 4751 EVT VT = Op.getValueType(); 4752 SDLoc DL(Op); 4753 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4754 if (Depth) { 4755 int Off = -XLenInBytes; 4756 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 4757 SDValue Offset = DAG.getConstant(Off, DL, VT); 4758 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 4759 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 4760 MachinePointerInfo()); 4761 } 4762 4763 // Return the value of the return address register, marking it an implicit 4764 // live-in. 4765 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 4766 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 4767 } 4768 4769 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 4770 SelectionDAG &DAG) const { 4771 SDLoc DL(Op); 4772 SDValue Lo = Op.getOperand(0); 4773 SDValue Hi = Op.getOperand(1); 4774 SDValue Shamt = Op.getOperand(2); 4775 EVT VT = Lo.getValueType(); 4776 4777 // if Shamt-XLEN < 0: // Shamt < XLEN 4778 // Lo = Lo << Shamt 4779 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt)) 4780 // else: 4781 // Lo = 0 4782 // Hi = Lo << (Shamt-XLEN) 4783 4784 SDValue Zero = DAG.getConstant(0, DL, VT); 4785 SDValue One = DAG.getConstant(1, DL, VT); 4786 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 4787 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 4788 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 4789 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 4790 4791 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 4792 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 4793 SDValue ShiftRightLo = 4794 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 4795 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 4796 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 4797 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 4798 4799 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 4800 4801 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 4802 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 4803 4804 SDValue Parts[2] = {Lo, Hi}; 4805 return DAG.getMergeValues(Parts, DL); 4806 } 4807 4808 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 4809 bool IsSRA) const { 4810 SDLoc DL(Op); 4811 SDValue Lo = Op.getOperand(0); 4812 SDValue Hi = Op.getOperand(1); 4813 SDValue Shamt = Op.getOperand(2); 4814 EVT VT = Lo.getValueType(); 4815 4816 // SRA expansion: 4817 // if Shamt-XLEN < 0: // Shamt < XLEN 4818 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1)) 4819 // Hi = Hi >>s Shamt 4820 // else: 4821 // Lo = Hi >>s (Shamt-XLEN); 4822 // Hi = Hi >>s (XLEN-1) 4823 // 4824 // SRL expansion: 4825 // if Shamt-XLEN < 0: // Shamt < XLEN 4826 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1)) 4827 // Hi = Hi >>u Shamt 4828 // else: 4829 // Lo = Hi >>u (Shamt-XLEN); 4830 // Hi = 0; 4831 4832 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 4833 4834 SDValue Zero = DAG.getConstant(0, DL, VT); 4835 SDValue One = DAG.getConstant(1, DL, VT); 4836 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 4837 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 4838 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 4839 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 4840 4841 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 4842 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 4843 SDValue ShiftLeftHi = 4844 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 4845 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 4846 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 4847 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 4848 SDValue HiFalse = 4849 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 4850 4851 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 4852 4853 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 4854 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 4855 4856 SDValue Parts[2] = {Lo, Hi}; 4857 return DAG.getMergeValues(Parts, DL); 4858 } 4859 4860 // Lower splats of i1 types to SETCC. For each mask vector type, we have a 4861 // legal equivalently-sized i8 type, so we can use that as a go-between. 4862 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, 4863 SelectionDAG &DAG) const { 4864 SDLoc DL(Op); 4865 MVT VT = Op.getSimpleValueType(); 4866 SDValue SplatVal = Op.getOperand(0); 4867 // All-zeros or all-ones splats are handled specially. 4868 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) { 4869 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 4870 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL); 4871 } 4872 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) { 4873 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 4874 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL); 4875 } 4876 MVT XLenVT = Subtarget.getXLenVT(); 4877 assert(SplatVal.getValueType() == XLenVT && 4878 "Unexpected type for i1 splat value"); 4879 MVT InterVT = VT.changeVectorElementType(MVT::i8); 4880 SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal, 4881 DAG.getConstant(1, DL, XLenVT)); 4882 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); 4883 SDValue Zero = DAG.getConstant(0, DL, InterVT); 4884 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); 4885 } 4886 4887 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 4888 // illegal (currently only vXi64 RV32). 4889 // FIXME: We could also catch non-constant sign-extended i32 values and lower 4890 // them to VMV_V_X_VL. 4891 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 4892 SelectionDAG &DAG) const { 4893 SDLoc DL(Op); 4894 MVT VecVT = Op.getSimpleValueType(); 4895 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 4896 "Unexpected SPLAT_VECTOR_PARTS lowering"); 4897 4898 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 4899 SDValue Lo = Op.getOperand(0); 4900 SDValue Hi = Op.getOperand(1); 4901 4902 if (VecVT.isFixedLengthVector()) { 4903 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 4904 SDLoc DL(Op); 4905 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 4906 4907 SDValue Res = 4908 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG); 4909 return convertFromScalableVector(VecVT, Res, DAG, Subtarget); 4910 } 4911 4912 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 4913 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 4914 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 4915 // If Hi constant is all the same sign bit as Lo, lower this as a custom 4916 // node in order to try and match RVV vector/scalar instructions. 4917 if ((LoC >> 31) == HiC) 4918 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), 4919 Lo, DAG.getRegister(RISCV::X0, MVT::i32)); 4920 } 4921 4922 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. 4923 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && 4924 isa<ConstantSDNode>(Hi.getOperand(1)) && 4925 Hi.getConstantOperandVal(1) == 31) 4926 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo, 4927 DAG.getRegister(RISCV::X0, MVT::i32)); 4928 4929 // Fall back to use a stack store and stride x0 vector load. Use X0 as VL. 4930 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, 4931 DAG.getUNDEF(VecVT), Lo, Hi, 4932 DAG.getRegister(RISCV::X0, MVT::i32)); 4933 } 4934 4935 // Custom-lower extensions from mask vectors by using a vselect either with 1 4936 // for zero/any-extension or -1 for sign-extension: 4937 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 4938 // Note that any-extension is lowered identically to zero-extension. 4939 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 4940 int64_t ExtTrueVal) const { 4941 SDLoc DL(Op); 4942 MVT VecVT = Op.getSimpleValueType(); 4943 SDValue Src = Op.getOperand(0); 4944 // Only custom-lower extensions from mask types 4945 assert(Src.getValueType().isVector() && 4946 Src.getValueType().getVectorElementType() == MVT::i1); 4947 4948 if (VecVT.isScalableVector()) { 4949 SDValue SplatZero = DAG.getConstant(0, DL, VecVT); 4950 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT); 4951 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 4952 } 4953 4954 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 4955 MVT I1ContainerVT = 4956 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 4957 4958 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 4959 4960 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second; 4961 4962 MVT XLenVT = Subtarget.getXLenVT(); 4963 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 4964 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 4965 4966 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 4967 DAG.getUNDEF(ContainerVT), SplatZero, VL); 4968 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 4969 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL); 4970 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 4971 SplatTrueVal, SplatZero, VL); 4972 4973 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 4974 } 4975 4976 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 4977 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 4978 MVT ExtVT = Op.getSimpleValueType(); 4979 // Only custom-lower extensions from fixed-length vector types. 4980 if (!ExtVT.isFixedLengthVector()) 4981 return Op; 4982 MVT VT = Op.getOperand(0).getSimpleValueType(); 4983 // Grab the canonical container type for the extended type. Infer the smaller 4984 // type from that to ensure the same number of vector elements, as we know 4985 // the LMUL will be sufficient to hold the smaller type. 4986 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 4987 // Get the extended container type manually to ensure the same number of 4988 // vector elements between source and dest. 4989 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 4990 ContainerExtVT.getVectorElementCount()); 4991 4992 SDValue Op1 = 4993 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 4994 4995 SDLoc DL(Op); 4996 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 4997 4998 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 4999 5000 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 5001 } 5002 5003 // Custom-lower truncations from vectors to mask vectors by using a mask and a 5004 // setcc operation: 5005 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 5006 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, 5007 SelectionDAG &DAG) const { 5008 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 5009 SDLoc DL(Op); 5010 EVT MaskVT = Op.getValueType(); 5011 // Only expect to custom-lower truncations to mask types 5012 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 5013 "Unexpected type for vector mask lowering"); 5014 SDValue Src = Op.getOperand(0); 5015 MVT VecVT = Src.getSimpleValueType(); 5016 SDValue Mask, VL; 5017 if (IsVPTrunc) { 5018 Mask = Op.getOperand(1); 5019 VL = Op.getOperand(2); 5020 } 5021 // If this is a fixed vector, we need to convert it to a scalable vector. 5022 MVT ContainerVT = VecVT; 5023 5024 if (VecVT.isFixedLengthVector()) { 5025 ContainerVT = getContainerForFixedLengthVector(VecVT); 5026 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 5027 if (IsVPTrunc) { 5028 MVT MaskContainerVT = 5029 getContainerForFixedLengthVector(Mask.getSimpleValueType()); 5030 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); 5031 } 5032 } 5033 5034 if (!IsVPTrunc) { 5035 std::tie(Mask, VL) = 5036 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 5037 } 5038 5039 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 5040 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 5041 5042 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 5043 DAG.getUNDEF(ContainerVT), SplatOne, VL); 5044 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 5045 DAG.getUNDEF(ContainerVT), SplatZero, VL); 5046 5047 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 5048 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, 5049 DAG.getUNDEF(ContainerVT), Mask, VL); 5050 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, 5051 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE), 5052 DAG.getUNDEF(MaskContainerVT), Mask, VL}); 5053 if (MaskVT.isFixedLengthVector()) 5054 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 5055 return Trunc; 5056 } 5057 5058 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, 5059 SelectionDAG &DAG) const { 5060 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE; 5061 SDLoc DL(Op); 5062 5063 MVT VT = Op.getSimpleValueType(); 5064 // Only custom-lower vector truncates 5065 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 5066 5067 // Truncates to mask types are handled differently 5068 if (VT.getVectorElementType() == MVT::i1) 5069 return lowerVectorMaskTruncLike(Op, DAG); 5070 5071 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 5072 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 5073 // truncate by one power of two at a time. 5074 MVT DstEltVT = VT.getVectorElementType(); 5075 5076 SDValue Src = Op.getOperand(0); 5077 MVT SrcVT = Src.getSimpleValueType(); 5078 MVT SrcEltVT = SrcVT.getVectorElementType(); 5079 5080 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && 5081 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 5082 "Unexpected vector truncate lowering"); 5083 5084 MVT ContainerVT = SrcVT; 5085 SDValue Mask, VL; 5086 if (IsVPTrunc) { 5087 Mask = Op.getOperand(1); 5088 VL = Op.getOperand(2); 5089 } 5090 if (SrcVT.isFixedLengthVector()) { 5091 ContainerVT = getContainerForFixedLengthVector(SrcVT); 5092 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 5093 if (IsVPTrunc) { 5094 MVT MaskVT = getMaskTypeFor(ContainerVT); 5095 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 5096 } 5097 } 5098 5099 SDValue Result = Src; 5100 if (!IsVPTrunc) { 5101 std::tie(Mask, VL) = 5102 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 5103 } 5104 5105 LLVMContext &Context = *DAG.getContext(); 5106 const ElementCount Count = ContainerVT.getVectorElementCount(); 5107 do { 5108 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 5109 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 5110 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 5111 Mask, VL); 5112 } while (SrcEltVT != DstEltVT); 5113 5114 if (SrcVT.isFixedLengthVector()) 5115 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 5116 5117 return Result; 5118 } 5119 5120 SDValue 5121 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op, 5122 SelectionDAG &DAG) const { 5123 bool IsVP = 5124 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND; 5125 bool IsExtend = 5126 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND; 5127 // RVV can only do truncate fp to types half the size as the source. We 5128 // custom-lower f64->f16 rounds via RVV's round-to-odd float 5129 // conversion instruction. 5130 SDLoc DL(Op); 5131 MVT VT = Op.getSimpleValueType(); 5132 5133 assert(VT.isVector() && "Unexpected type for vector truncate lowering"); 5134 5135 SDValue Src = Op.getOperand(0); 5136 MVT SrcVT = Src.getSimpleValueType(); 5137 5138 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 || 5139 SrcVT.getVectorElementType() != MVT::f16); 5140 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 || 5141 SrcVT.getVectorElementType() != MVT::f64); 5142 5143 bool IsDirectConv = IsDirectExtend || IsDirectTrunc; 5144 5145 // Prepare any fixed-length vector operands. 5146 MVT ContainerVT = VT; 5147 SDValue Mask, VL; 5148 if (IsVP) { 5149 Mask = Op.getOperand(1); 5150 VL = Op.getOperand(2); 5151 } 5152 if (VT.isFixedLengthVector()) { 5153 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 5154 ContainerVT = 5155 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 5156 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 5157 if (IsVP) { 5158 MVT MaskVT = getMaskTypeFor(ContainerVT); 5159 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 5160 } 5161 } 5162 5163 if (!IsVP) 5164 std::tie(Mask, VL) = 5165 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 5166 5167 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL; 5168 5169 if (IsDirectConv) { 5170 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL); 5171 if (VT.isFixedLengthVector()) 5172 Src = convertFromScalableVector(VT, Src, DAG, Subtarget); 5173 return Src; 5174 } 5175 5176 unsigned InterConvOpc = 5177 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL; 5178 5179 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 5180 SDValue IntermediateConv = 5181 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL); 5182 SDValue Result = 5183 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL); 5184 if (VT.isFixedLengthVector()) 5185 return convertFromScalableVector(VT, Result, DAG, Subtarget); 5186 return Result; 5187 } 5188 5189 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 5190 // first position of a vector, and that vector is slid up to the insert index. 5191 // By limiting the active vector length to index+1 and merging with the 5192 // original vector (with an undisturbed tail policy for elements >= VL), we 5193 // achieve the desired result of leaving all elements untouched except the one 5194 // at VL-1, which is replaced with the desired value. 5195 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 5196 SelectionDAG &DAG) const { 5197 SDLoc DL(Op); 5198 MVT VecVT = Op.getSimpleValueType(); 5199 SDValue Vec = Op.getOperand(0); 5200 SDValue Val = Op.getOperand(1); 5201 SDValue Idx = Op.getOperand(2); 5202 5203 if (VecVT.getVectorElementType() == MVT::i1) { 5204 // FIXME: For now we just promote to an i8 vector and insert into that, 5205 // but this is probably not optimal. 5206 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 5207 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 5208 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx); 5209 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec); 5210 } 5211 5212 MVT ContainerVT = VecVT; 5213 // If the operand is a fixed-length vector, convert to a scalable one. 5214 if (VecVT.isFixedLengthVector()) { 5215 ContainerVT = getContainerForFixedLengthVector(VecVT); 5216 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 5217 } 5218 5219 MVT XLenVT = Subtarget.getXLenVT(); 5220 5221 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 5222 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 5223 // Even i64-element vectors on RV32 can be lowered without scalar 5224 // legalization if the most-significant 32 bits of the value are not affected 5225 // by the sign-extension of the lower 32 bits. 5226 // TODO: We could also catch sign extensions of a 32-bit value. 5227 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 5228 const auto *CVal = cast<ConstantSDNode>(Val); 5229 if (isInt<32>(CVal->getSExtValue())) { 5230 IsLegalInsert = true; 5231 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 5232 } 5233 } 5234 5235 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 5236 5237 SDValue ValInVec; 5238 5239 if (IsLegalInsert) { 5240 unsigned Opc = 5241 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; 5242 if (isNullConstant(Idx)) { 5243 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); 5244 if (!VecVT.isFixedLengthVector()) 5245 return Vec; 5246 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); 5247 } 5248 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget); 5249 } else { 5250 // On RV32, i64-element vectors must be specially handled to place the 5251 // value at element 0, by using two vslide1down instructions in sequence on 5252 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 5253 // this. 5254 SDValue One = DAG.getConstant(1, DL, XLenVT); 5255 SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero); 5256 SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One); 5257 MVT I32ContainerVT = 5258 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 5259 SDValue I32Mask = 5260 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 5261 // Limit the active VL to two. 5262 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 5263 // If the Idx is 0 we can insert directly into the vector. 5264 if (isNullConstant(Idx)) { 5265 // First slide in the lo value, then the hi in above it. We use slide1down 5266 // to avoid the register group overlap constraint of vslide1up. 5267 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 5268 Vec, Vec, ValLo, I32Mask, InsertI64VL); 5269 // If the source vector is undef don't pass along the tail elements from 5270 // the previous slide1down. 5271 SDValue Tail = Vec.isUndef() ? Vec : ValInVec; 5272 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 5273 Tail, ValInVec, ValHi, I32Mask, InsertI64VL); 5274 // Bitcast back to the right container type. 5275 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 5276 5277 if (!VecVT.isFixedLengthVector()) 5278 return ValInVec; 5279 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget); 5280 } 5281 5282 // First slide in the lo value, then the hi in above it. We use slide1down 5283 // to avoid the register group overlap constraint of vslide1up. 5284 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 5285 DAG.getUNDEF(I32ContainerVT), 5286 DAG.getUNDEF(I32ContainerVT), ValLo, 5287 I32Mask, InsertI64VL); 5288 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT, 5289 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi, 5290 I32Mask, InsertI64VL); 5291 // Bitcast back to the right container type. 5292 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 5293 } 5294 5295 // Now that the value is in a vector, slide it into position. 5296 SDValue InsertVL = 5297 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 5298 5299 // Use tail agnostic policy if Idx is the last index of Vec. 5300 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 5301 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) && 5302 cast<ConstantSDNode>(Idx)->getZExtValue() + 1 == 5303 VecVT.getVectorNumElements()) 5304 Policy = RISCVII::TAIL_AGNOSTIC; 5305 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec, 5306 Idx, Mask, InsertVL, Policy); 5307 if (!VecVT.isFixedLengthVector()) 5308 return Slideup; 5309 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 5310 } 5311 5312 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 5313 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 5314 // types this is done using VMV_X_S to allow us to glean information about the 5315 // sign bits of the result. 5316 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 5317 SelectionDAG &DAG) const { 5318 SDLoc DL(Op); 5319 SDValue Idx = Op.getOperand(1); 5320 SDValue Vec = Op.getOperand(0); 5321 EVT EltVT = Op.getValueType(); 5322 MVT VecVT = Vec.getSimpleValueType(); 5323 MVT XLenVT = Subtarget.getXLenVT(); 5324 5325 if (VecVT.getVectorElementType() == MVT::i1) { 5326 // Use vfirst.m to extract the first bit. 5327 if (isNullConstant(Idx)) { 5328 MVT ContainerVT = VecVT; 5329 if (VecVT.isFixedLengthVector()) { 5330 ContainerVT = getContainerForFixedLengthVector(VecVT); 5331 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 5332 } 5333 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 5334 SDValue Vfirst = 5335 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL); 5336 return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT), 5337 ISD::SETEQ); 5338 } 5339 if (VecVT.isFixedLengthVector()) { 5340 unsigned NumElts = VecVT.getVectorNumElements(); 5341 if (NumElts >= 8) { 5342 MVT WideEltVT; 5343 unsigned WidenVecLen; 5344 SDValue ExtractElementIdx; 5345 SDValue ExtractBitIdx; 5346 unsigned MaxEEW = Subtarget.getELEN(); 5347 MVT LargestEltVT = MVT::getIntegerVT( 5348 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits()))); 5349 if (NumElts <= LargestEltVT.getSizeInBits()) { 5350 assert(isPowerOf2_32(NumElts) && 5351 "the number of elements should be power of 2"); 5352 WideEltVT = MVT::getIntegerVT(NumElts); 5353 WidenVecLen = 1; 5354 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT); 5355 ExtractBitIdx = Idx; 5356 } else { 5357 WideEltVT = LargestEltVT; 5358 WidenVecLen = NumElts / WideEltVT.getSizeInBits(); 5359 // extract element index = index / element width 5360 ExtractElementIdx = DAG.getNode( 5361 ISD::SRL, DL, XLenVT, Idx, 5362 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT)); 5363 // mask bit index = index % element width 5364 ExtractBitIdx = DAG.getNode( 5365 ISD::AND, DL, XLenVT, Idx, 5366 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT)); 5367 } 5368 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen); 5369 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec); 5370 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, 5371 Vec, ExtractElementIdx); 5372 // Extract the bit from GPR. 5373 SDValue ShiftRight = 5374 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx); 5375 return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight, 5376 DAG.getConstant(1, DL, XLenVT)); 5377 } 5378 } 5379 // Otherwise, promote to an i8 vector and extract from that. 5380 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 5381 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 5382 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 5383 } 5384 5385 // If this is a fixed vector, we need to convert it to a scalable vector. 5386 MVT ContainerVT = VecVT; 5387 if (VecVT.isFixedLengthVector()) { 5388 ContainerVT = getContainerForFixedLengthVector(VecVT); 5389 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 5390 } 5391 5392 // If the index is 0, the vector is already in the right position. 5393 if (!isNullConstant(Idx)) { 5394 // Use a VL of 1 to avoid processing more elements than we need. 5395 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 5396 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 5397 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 5398 } 5399 5400 if (!EltVT.isInteger()) { 5401 // Floating-point extracts are handled in TableGen. 5402 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 5403 DAG.getConstant(0, DL, XLenVT)); 5404 } 5405 5406 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 5407 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 5408 } 5409 5410 // Some RVV intrinsics may claim that they want an integer operand to be 5411 // promoted or expanded. 5412 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, 5413 const RISCVSubtarget &Subtarget) { 5414 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 5415 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 5416 "Unexpected opcode"); 5417 5418 if (!Subtarget.hasVInstructions()) 5419 return SDValue(); 5420 5421 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 5422 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 5423 SDLoc DL(Op); 5424 5425 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 5426 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 5427 if (!II || !II->hasScalarOperand()) 5428 return SDValue(); 5429 5430 unsigned SplatOp = II->ScalarOperand + 1 + HasChain; 5431 assert(SplatOp < Op.getNumOperands()); 5432 5433 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 5434 SDValue &ScalarOp = Operands[SplatOp]; 5435 MVT OpVT = ScalarOp.getSimpleValueType(); 5436 MVT XLenVT = Subtarget.getXLenVT(); 5437 5438 // If this isn't a scalar, or its type is XLenVT we're done. 5439 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 5440 return SDValue(); 5441 5442 // Simplest case is that the operand needs to be promoted to XLenVT. 5443 if (OpVT.bitsLT(XLenVT)) { 5444 // If the operand is a constant, sign extend to increase our chances 5445 // of being able to use a .vi instruction. ANY_EXTEND would become a 5446 // a zero extend and the simm5 check in isel would fail. 5447 // FIXME: Should we ignore the upper bits in isel instead? 5448 unsigned ExtOpc = 5449 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 5450 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 5451 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 5452 } 5453 5454 // Use the previous operand to get the vXi64 VT. The result might be a mask 5455 // VT for compares. Using the previous operand assumes that the previous 5456 // operand will never have a smaller element size than a scalar operand and 5457 // that a widening operation never uses SEW=64. 5458 // NOTE: If this fails the below assert, we can probably just find the 5459 // element count from any operand or result and use it to construct the VT. 5460 assert(II->ScalarOperand > 0 && "Unexpected splat operand!"); 5461 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); 5462 5463 // The more complex case is when the scalar is larger than XLenVT. 5464 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 5465 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 5466 5467 // If this is a sign-extended 32-bit value, we can truncate it and rely on the 5468 // instruction to sign-extend since SEW>XLEN. 5469 if (DAG.ComputeNumSignBits(ScalarOp) > 32) { 5470 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp); 5471 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 5472 } 5473 5474 switch (IntNo) { 5475 case Intrinsic::riscv_vslide1up: 5476 case Intrinsic::riscv_vslide1down: 5477 case Intrinsic::riscv_vslide1up_mask: 5478 case Intrinsic::riscv_vslide1down_mask: { 5479 // We need to special case these when the scalar is larger than XLen. 5480 unsigned NumOps = Op.getNumOperands(); 5481 bool IsMasked = NumOps == 7; 5482 5483 // Convert the vector source to the equivalent nxvXi32 vector. 5484 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 5485 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]); 5486 5487 SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp, 5488 DAG.getConstant(0, DL, XLenVT)); 5489 SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp, 5490 DAG.getConstant(1, DL, XLenVT)); 5491 5492 // Double the VL since we halved SEW. 5493 SDValue AVL = getVLOperand(Op); 5494 SDValue I32VL; 5495 5496 // Optimize for constant AVL 5497 if (isa<ConstantSDNode>(AVL)) { 5498 unsigned EltSize = VT.getScalarSizeInBits(); 5499 unsigned MinSize = VT.getSizeInBits().getKnownMinValue(); 5500 5501 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 5502 unsigned MaxVLMAX = 5503 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 5504 5505 unsigned VectorBitsMin = Subtarget.getRealMinVLen(); 5506 unsigned MinVLMAX = 5507 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); 5508 5509 uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue(); 5510 if (AVLInt <= MinVLMAX) { 5511 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); 5512 } else if (AVLInt >= 2 * MaxVLMAX) { 5513 // Just set vl to VLMAX in this situation 5514 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT); 5515 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 5516 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); 5517 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 5518 SDValue SETVLMAX = DAG.getTargetConstant( 5519 Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32); 5520 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, 5521 LMUL); 5522 } else { 5523 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl 5524 // is related to the hardware implementation. 5525 // So let the following code handle 5526 } 5527 } 5528 if (!I32VL) { 5529 RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); 5530 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); 5531 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits()); 5532 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); 5533 SDValue SETVL = 5534 DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32); 5535 // Using vsetvli instruction to get actually used length which related to 5536 // the hardware implementation 5537 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, 5538 SEW, LMUL); 5539 I32VL = 5540 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); 5541 } 5542 5543 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG); 5544 5545 // Shift the two scalar parts in using SEW=32 slide1up/slide1down 5546 // instructions. 5547 SDValue Passthru; 5548 if (IsMasked) 5549 Passthru = DAG.getUNDEF(I32VT); 5550 else 5551 Passthru = DAG.getBitcast(I32VT, Operands[1]); 5552 5553 if (IntNo == Intrinsic::riscv_vslide1up || 5554 IntNo == Intrinsic::riscv_vslide1up_mask) { 5555 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 5556 ScalarHi, I32Mask, I32VL); 5557 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec, 5558 ScalarLo, I32Mask, I32VL); 5559 } else { 5560 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 5561 ScalarLo, I32Mask, I32VL); 5562 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec, 5563 ScalarHi, I32Mask, I32VL); 5564 } 5565 5566 // Convert back to nxvXi64. 5567 Vec = DAG.getBitcast(VT, Vec); 5568 5569 if (!IsMasked) 5570 return Vec; 5571 // Apply mask after the operation. 5572 SDValue Mask = Operands[NumOps - 3]; 5573 SDValue MaskedOff = Operands[1]; 5574 // Assume Policy operand is the last operand. 5575 uint64_t Policy = 5576 cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue(); 5577 // We don't need to select maskedoff if it's undef. 5578 if (MaskedOff.isUndef()) 5579 return Vec; 5580 // TAMU 5581 if (Policy == RISCVII::TAIL_AGNOSTIC) 5582 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, 5583 AVL); 5584 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. 5585 // It's fine because vmerge does not care mask policy. 5586 return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, 5587 AVL); 5588 } 5589 } 5590 5591 // We need to convert the scalar to a splat vector. 5592 SDValue VL = getVLOperand(Op); 5593 assert(VL.getValueType() == XLenVT); 5594 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG); 5595 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 5596 } 5597 5598 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 5599 SelectionDAG &DAG) const { 5600 unsigned IntNo = Op.getConstantOperandVal(0); 5601 SDLoc DL(Op); 5602 MVT XLenVT = Subtarget.getXLenVT(); 5603 5604 switch (IntNo) { 5605 default: 5606 break; // Don't custom lower most intrinsics. 5607 case Intrinsic::thread_pointer: { 5608 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 5609 return DAG.getRegister(RISCV::X4, PtrVT); 5610 } 5611 case Intrinsic::riscv_orc_b: 5612 case Intrinsic::riscv_brev8: { 5613 unsigned Opc = 5614 IntNo == Intrinsic::riscv_brev8 ? RISCVISD::BREV8 : RISCVISD::ORC_B; 5615 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 5616 } 5617 case Intrinsic::riscv_zip: 5618 case Intrinsic::riscv_unzip: { 5619 unsigned Opc = 5620 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; 5621 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); 5622 } 5623 case Intrinsic::riscv_vmv_x_s: 5624 assert(Op.getValueType() == XLenVT && "Unexpected VT!"); 5625 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 5626 Op.getOperand(1)); 5627 case Intrinsic::riscv_vfmv_f_s: 5628 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), 5629 Op.getOperand(1), DAG.getConstant(0, DL, XLenVT)); 5630 case Intrinsic::riscv_vmv_v_x: 5631 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), 5632 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG, 5633 Subtarget); 5634 case Intrinsic::riscv_vfmv_v_f: 5635 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 5636 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 5637 case Intrinsic::riscv_vmv_s_x: { 5638 SDValue Scalar = Op.getOperand(2); 5639 5640 if (Scalar.getValueType().bitsLE(XLenVT)) { 5641 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 5642 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(), 5643 Op.getOperand(1), Scalar, Op.getOperand(3)); 5644 } 5645 5646 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 5647 5648 // This is an i64 value that lives in two scalar registers. We have to 5649 // insert this in a convoluted way. First we build vXi64 splat containing 5650 // the two values that we assemble using some bit math. Next we'll use 5651 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 5652 // to merge element 0 from our splat into the source vector. 5653 // FIXME: This is probably not the best way to do this, but it is 5654 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 5655 // point. 5656 // sw lo, (a0) 5657 // sw hi, 4(a0) 5658 // vlse vX, (a0) 5659 // 5660 // vid.v vVid 5661 // vmseq.vx mMask, vVid, 0 5662 // vmerge.vvm vDest, vSrc, vVal, mMask 5663 MVT VT = Op.getSimpleValueType(); 5664 SDValue Vec = Op.getOperand(1); 5665 SDValue VL = getVLOperand(Op); 5666 5667 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG); 5668 if (Op.getOperand(1).isUndef()) 5669 return SplattedVal; 5670 SDValue SplattedIdx = 5671 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 5672 DAG.getConstant(0, DL, MVT::i32), VL); 5673 5674 MVT MaskVT = getMaskTypeFor(VT); 5675 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG); 5676 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 5677 SDValue SelectCond = 5678 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 5679 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ), 5680 DAG.getUNDEF(MaskVT), Mask, VL}); 5681 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, 5682 Vec, VL); 5683 } 5684 } 5685 5686 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 5687 } 5688 5689 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 5690 SelectionDAG &DAG) const { 5691 unsigned IntNo = Op.getConstantOperandVal(1); 5692 switch (IntNo) { 5693 default: 5694 break; 5695 case Intrinsic::riscv_masked_strided_load: { 5696 SDLoc DL(Op); 5697 MVT XLenVT = Subtarget.getXLenVT(); 5698 5699 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 5700 // the selection of the masked intrinsics doesn't do this for us. 5701 SDValue Mask = Op.getOperand(5); 5702 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 5703 5704 MVT VT = Op->getSimpleValueType(0); 5705 MVT ContainerVT = VT; 5706 if (VT.isFixedLengthVector()) 5707 ContainerVT = getContainerForFixedLengthVector(VT); 5708 5709 SDValue PassThru = Op.getOperand(2); 5710 if (!IsUnmasked) { 5711 MVT MaskVT = getMaskTypeFor(ContainerVT); 5712 if (VT.isFixedLengthVector()) { 5713 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 5714 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 5715 } 5716 } 5717 5718 auto *Load = cast<MemIntrinsicSDNode>(Op); 5719 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 5720 SDValue Ptr = Op.getOperand(3); 5721 SDValue Stride = Op.getOperand(4); 5722 SDValue Result, Chain; 5723 5724 // TODO: We restrict this to unmasked loads currently in consideration of 5725 // the complexity of hanlding all falses masks. 5726 if (IsUnmasked && isNullConstant(Stride)) { 5727 MVT ScalarVT = ContainerVT.getVectorElementType(); 5728 SDValue ScalarLoad = 5729 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr, 5730 ScalarVT, Load->getMemOperand()); 5731 Chain = ScalarLoad.getValue(1); 5732 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG, 5733 Subtarget); 5734 } else { 5735 SDValue IntID = DAG.getTargetConstant( 5736 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, 5737 XLenVT); 5738 5739 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID}; 5740 if (IsUnmasked) 5741 Ops.push_back(DAG.getUNDEF(ContainerVT)); 5742 else 5743 Ops.push_back(PassThru); 5744 Ops.push_back(Ptr); 5745 Ops.push_back(Stride); 5746 if (!IsUnmasked) 5747 Ops.push_back(Mask); 5748 Ops.push_back(VL); 5749 if (!IsUnmasked) { 5750 SDValue Policy = 5751 DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 5752 Ops.push_back(Policy); 5753 } 5754 5755 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 5756 Result = 5757 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 5758 Load->getMemoryVT(), Load->getMemOperand()); 5759 Chain = Result.getValue(1); 5760 } 5761 if (VT.isFixedLengthVector()) 5762 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 5763 return DAG.getMergeValues({Result, Chain}, DL); 5764 } 5765 case Intrinsic::riscv_seg2_load: 5766 case Intrinsic::riscv_seg3_load: 5767 case Intrinsic::riscv_seg4_load: 5768 case Intrinsic::riscv_seg5_load: 5769 case Intrinsic::riscv_seg6_load: 5770 case Intrinsic::riscv_seg7_load: 5771 case Intrinsic::riscv_seg8_load: { 5772 SDLoc DL(Op); 5773 static const Intrinsic::ID VlsegInts[7] = { 5774 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, 5775 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, 5776 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, 5777 Intrinsic::riscv_vlseg8}; 5778 unsigned NF = Op->getNumValues() - 1; 5779 assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); 5780 MVT XLenVT = Subtarget.getXLenVT(); 5781 MVT VT = Op->getSimpleValueType(0); 5782 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5783 5784 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); 5785 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); 5786 auto *Load = cast<MemIntrinsicSDNode>(Op); 5787 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT); 5788 ContainerVTs.push_back(MVT::Other); 5789 SDVTList VTs = DAG.getVTList(ContainerVTs); 5790 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID}; 5791 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT)); 5792 Ops.push_back(Op.getOperand(2)); 5793 Ops.push_back(VL); 5794 SDValue Result = 5795 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 5796 Load->getMemoryVT(), Load->getMemOperand()); 5797 SmallVector<SDValue, 9> Results; 5798 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) 5799 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx), 5800 DAG, Subtarget)); 5801 Results.push_back(Result.getValue(NF)); 5802 return DAG.getMergeValues(Results, DL); 5803 } 5804 } 5805 5806 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget); 5807 } 5808 5809 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, 5810 SelectionDAG &DAG) const { 5811 unsigned IntNo = Op.getConstantOperandVal(1); 5812 switch (IntNo) { 5813 default: 5814 break; 5815 case Intrinsic::riscv_masked_strided_store: { 5816 SDLoc DL(Op); 5817 MVT XLenVT = Subtarget.getXLenVT(); 5818 5819 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 5820 // the selection of the masked intrinsics doesn't do this for us. 5821 SDValue Mask = Op.getOperand(5); 5822 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 5823 5824 SDValue Val = Op.getOperand(2); 5825 MVT VT = Val.getSimpleValueType(); 5826 MVT ContainerVT = VT; 5827 if (VT.isFixedLengthVector()) { 5828 ContainerVT = getContainerForFixedLengthVector(VT); 5829 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 5830 } 5831 if (!IsUnmasked) { 5832 MVT MaskVT = getMaskTypeFor(ContainerVT); 5833 if (VT.isFixedLengthVector()) 5834 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 5835 } 5836 5837 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 5838 5839 SDValue IntID = DAG.getTargetConstant( 5840 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, 5841 XLenVT); 5842 5843 auto *Store = cast<MemIntrinsicSDNode>(Op); 5844 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID}; 5845 Ops.push_back(Val); 5846 Ops.push_back(Op.getOperand(3)); // Ptr 5847 Ops.push_back(Op.getOperand(4)); // Stride 5848 if (!IsUnmasked) 5849 Ops.push_back(Mask); 5850 Ops.push_back(VL); 5851 5852 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), 5853 Ops, Store->getMemoryVT(), 5854 Store->getMemOperand()); 5855 } 5856 } 5857 5858 return SDValue(); 5859 } 5860 5861 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 5862 switch (ISDOpcode) { 5863 default: 5864 llvm_unreachable("Unhandled reduction"); 5865 case ISD::VECREDUCE_ADD: 5866 return RISCVISD::VECREDUCE_ADD_VL; 5867 case ISD::VECREDUCE_UMAX: 5868 return RISCVISD::VECREDUCE_UMAX_VL; 5869 case ISD::VECREDUCE_SMAX: 5870 return RISCVISD::VECREDUCE_SMAX_VL; 5871 case ISD::VECREDUCE_UMIN: 5872 return RISCVISD::VECREDUCE_UMIN_VL; 5873 case ISD::VECREDUCE_SMIN: 5874 return RISCVISD::VECREDUCE_SMIN_VL; 5875 case ISD::VECREDUCE_AND: 5876 return RISCVISD::VECREDUCE_AND_VL; 5877 case ISD::VECREDUCE_OR: 5878 return RISCVISD::VECREDUCE_OR_VL; 5879 case ISD::VECREDUCE_XOR: 5880 return RISCVISD::VECREDUCE_XOR_VL; 5881 } 5882 } 5883 5884 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, 5885 SelectionDAG &DAG, 5886 bool IsVP) const { 5887 SDLoc DL(Op); 5888 SDValue Vec = Op.getOperand(IsVP ? 1 : 0); 5889 MVT VecVT = Vec.getSimpleValueType(); 5890 assert((Op.getOpcode() == ISD::VECREDUCE_AND || 5891 Op.getOpcode() == ISD::VECREDUCE_OR || 5892 Op.getOpcode() == ISD::VECREDUCE_XOR || 5893 Op.getOpcode() == ISD::VP_REDUCE_AND || 5894 Op.getOpcode() == ISD::VP_REDUCE_OR || 5895 Op.getOpcode() == ISD::VP_REDUCE_XOR) && 5896 "Unexpected reduction lowering"); 5897 5898 MVT XLenVT = Subtarget.getXLenVT(); 5899 assert(Op.getValueType() == XLenVT && 5900 "Expected reduction output to be legalized to XLenVT"); 5901 5902 MVT ContainerVT = VecVT; 5903 if (VecVT.isFixedLengthVector()) { 5904 ContainerVT = getContainerForFixedLengthVector(VecVT); 5905 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 5906 } 5907 5908 SDValue Mask, VL; 5909 if (IsVP) { 5910 Mask = Op.getOperand(2); 5911 VL = Op.getOperand(3); 5912 } else { 5913 std::tie(Mask, VL) = 5914 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 5915 } 5916 5917 unsigned BaseOpc; 5918 ISD::CondCode CC; 5919 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 5920 5921 switch (Op.getOpcode()) { 5922 default: 5923 llvm_unreachable("Unhandled reduction"); 5924 case ISD::VECREDUCE_AND: 5925 case ISD::VP_REDUCE_AND: { 5926 // vcpop ~x == 0 5927 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 5928 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); 5929 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 5930 CC = ISD::SETEQ; 5931 BaseOpc = ISD::AND; 5932 break; 5933 } 5934 case ISD::VECREDUCE_OR: 5935 case ISD::VP_REDUCE_OR: 5936 // vcpop x != 0 5937 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 5938 CC = ISD::SETNE; 5939 BaseOpc = ISD::OR; 5940 break; 5941 case ISD::VECREDUCE_XOR: 5942 case ISD::VP_REDUCE_XOR: { 5943 // ((vcpop x) & 1) != 0 5944 SDValue One = DAG.getConstant(1, DL, XLenVT); 5945 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 5946 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One); 5947 CC = ISD::SETNE; 5948 BaseOpc = ISD::XOR; 5949 break; 5950 } 5951 } 5952 5953 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC); 5954 5955 if (!IsVP) 5956 return SetCC; 5957 5958 // Now include the start value in the operation. 5959 // Note that we must return the start value when no elements are operated 5960 // upon. The vcpop instructions we've emitted in each case above will return 5961 // 0 for an inactive vector, and so we've already received the neutral value: 5962 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we 5963 // can simply include the start value. 5964 return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0)); 5965 } 5966 5967 static bool hasNonZeroAVL(SDValue AVL) { 5968 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL); 5969 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL); 5970 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) || 5971 (ImmAVL && ImmAVL->getZExtValue() >= 1); 5972 } 5973 5974 /// Helper to lower a reduction sequence of the form: 5975 /// scalar = reduce_op vec, scalar_start 5976 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, 5977 SDValue StartValue, SDValue Vec, SDValue Mask, 5978 SDValue VL, SDLoc DL, SelectionDAG &DAG, 5979 const RISCVSubtarget &Subtarget) { 5980 const MVT VecVT = Vec.getSimpleValueType(); 5981 const MVT M1VT = getLMUL1VT(VecVT); 5982 const MVT XLenVT = Subtarget.getXLenVT(); 5983 const bool NonZeroAVL = hasNonZeroAVL(VL); 5984 5985 // The reduction needs an LMUL1 input; do the splat at either LMUL1 5986 // or the original VT if fractional. 5987 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT; 5988 // We reuse the VL of the reduction to reduce vsetvli toggles if we can 5989 // prove it is non-zero. For the AVL=0 case, we need the scalar to 5990 // be the result of the reduction operation. 5991 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT); 5992 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, 5993 DAG, Subtarget); 5994 if (M1VT != InnerVT) 5995 InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, 5996 DAG.getUNDEF(M1VT), 5997 InitialValue, DAG.getConstant(0, DL, XLenVT)); 5998 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue; 5999 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, PassThru, Vec, 6000 InitialValue, Mask, VL); 6001 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, 6002 DAG.getConstant(0, DL, XLenVT)); 6003 } 6004 6005 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 6006 SelectionDAG &DAG) const { 6007 SDLoc DL(Op); 6008 SDValue Vec = Op.getOperand(0); 6009 EVT VecEVT = Vec.getValueType(); 6010 6011 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 6012 6013 // Due to ordering in legalize types we may have a vector type that needs to 6014 // be split. Do that manually so we can get down to a legal type. 6015 while (getTypeAction(*DAG.getContext(), VecEVT) == 6016 TargetLowering::TypeSplitVector) { 6017 auto [Lo, Hi] = DAG.SplitVector(Vec, DL); 6018 VecEVT = Lo.getValueType(); 6019 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 6020 } 6021 6022 // TODO: The type may need to be widened rather than split. Or widened before 6023 // it can be split. 6024 if (!isTypeLegal(VecEVT)) 6025 return SDValue(); 6026 6027 MVT VecVT = VecEVT.getSimpleVT(); 6028 MVT VecEltVT = VecVT.getVectorElementType(); 6029 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 6030 6031 MVT ContainerVT = VecVT; 6032 if (VecVT.isFixedLengthVector()) { 6033 ContainerVT = getContainerForFixedLengthVector(VecVT); 6034 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 6035 } 6036 6037 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 6038 6039 SDValue NeutralElem = 6040 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 6041 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), NeutralElem, Vec, 6042 Mask, VL, DL, DAG, Subtarget); 6043 } 6044 6045 // Given a reduction op, this function returns the matching reduction opcode, 6046 // the vector SDValue and the scalar SDValue required to lower this to a 6047 // RISCVISD node. 6048 static std::tuple<unsigned, SDValue, SDValue> 6049 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 6050 SDLoc DL(Op); 6051 auto Flags = Op->getFlags(); 6052 unsigned Opcode = Op.getOpcode(); 6053 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode); 6054 switch (Opcode) { 6055 default: 6056 llvm_unreachable("Unhandled reduction"); 6057 case ISD::VECREDUCE_FADD: { 6058 // Use positive zero if we can. It is cheaper to materialize. 6059 SDValue Zero = 6060 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); 6061 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); 6062 } 6063 case ISD::VECREDUCE_SEQ_FADD: 6064 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 6065 Op.getOperand(0)); 6066 case ISD::VECREDUCE_FMIN: 6067 return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0), 6068 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); 6069 case ISD::VECREDUCE_FMAX: 6070 return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0), 6071 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); 6072 } 6073 } 6074 6075 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 6076 SelectionDAG &DAG) const { 6077 SDLoc DL(Op); 6078 MVT VecEltVT = Op.getSimpleValueType(); 6079 6080 unsigned RVVOpcode; 6081 SDValue VectorVal, ScalarVal; 6082 std::tie(RVVOpcode, VectorVal, ScalarVal) = 6083 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 6084 MVT VecVT = VectorVal.getSimpleValueType(); 6085 6086 MVT ContainerVT = VecVT; 6087 if (VecVT.isFixedLengthVector()) { 6088 ContainerVT = getContainerForFixedLengthVector(VecVT); 6089 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 6090 } 6091 6092 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 6093 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal, 6094 VectorVal, Mask, VL, DL, DAG, Subtarget); 6095 } 6096 6097 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) { 6098 switch (ISDOpcode) { 6099 default: 6100 llvm_unreachable("Unhandled reduction"); 6101 case ISD::VP_REDUCE_ADD: 6102 return RISCVISD::VECREDUCE_ADD_VL; 6103 case ISD::VP_REDUCE_UMAX: 6104 return RISCVISD::VECREDUCE_UMAX_VL; 6105 case ISD::VP_REDUCE_SMAX: 6106 return RISCVISD::VECREDUCE_SMAX_VL; 6107 case ISD::VP_REDUCE_UMIN: 6108 return RISCVISD::VECREDUCE_UMIN_VL; 6109 case ISD::VP_REDUCE_SMIN: 6110 return RISCVISD::VECREDUCE_SMIN_VL; 6111 case ISD::VP_REDUCE_AND: 6112 return RISCVISD::VECREDUCE_AND_VL; 6113 case ISD::VP_REDUCE_OR: 6114 return RISCVISD::VECREDUCE_OR_VL; 6115 case ISD::VP_REDUCE_XOR: 6116 return RISCVISD::VECREDUCE_XOR_VL; 6117 case ISD::VP_REDUCE_FADD: 6118 return RISCVISD::VECREDUCE_FADD_VL; 6119 case ISD::VP_REDUCE_SEQ_FADD: 6120 return RISCVISD::VECREDUCE_SEQ_FADD_VL; 6121 case ISD::VP_REDUCE_FMAX: 6122 return RISCVISD::VECREDUCE_FMAX_VL; 6123 case ISD::VP_REDUCE_FMIN: 6124 return RISCVISD::VECREDUCE_FMIN_VL; 6125 } 6126 } 6127 6128 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, 6129 SelectionDAG &DAG) const { 6130 SDLoc DL(Op); 6131 SDValue Vec = Op.getOperand(1); 6132 EVT VecEVT = Vec.getValueType(); 6133 6134 // TODO: The type may need to be widened rather than split. Or widened before 6135 // it can be split. 6136 if (!isTypeLegal(VecEVT)) 6137 return SDValue(); 6138 6139 MVT VecVT = VecEVT.getSimpleVT(); 6140 unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode()); 6141 6142 if (VecVT.isFixedLengthVector()) { 6143 auto ContainerVT = getContainerForFixedLengthVector(VecVT); 6144 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 6145 } 6146 6147 SDValue VL = Op.getOperand(3); 6148 SDValue Mask = Op.getOperand(2); 6149 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0), 6150 Vec, Mask, VL, DL, DAG, Subtarget); 6151 } 6152 6153 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 6154 SelectionDAG &DAG) const { 6155 SDValue Vec = Op.getOperand(0); 6156 SDValue SubVec = Op.getOperand(1); 6157 MVT VecVT = Vec.getSimpleValueType(); 6158 MVT SubVecVT = SubVec.getSimpleValueType(); 6159 6160 SDLoc DL(Op); 6161 MVT XLenVT = Subtarget.getXLenVT(); 6162 unsigned OrigIdx = Op.getConstantOperandVal(2); 6163 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6164 6165 // We don't have the ability to slide mask vectors up indexed by their i1 6166 // elements; the smallest we can do is i8. Often we are able to bitcast to 6167 // equivalent i8 vectors. Note that when inserting a fixed-length vector 6168 // into a scalable one, we might not necessarily have enough scalable 6169 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 6170 if (SubVecVT.getVectorElementType() == MVT::i1 && 6171 (OrigIdx != 0 || !Vec.isUndef())) { 6172 if (VecVT.getVectorMinNumElements() >= 8 && 6173 SubVecVT.getVectorMinNumElements() >= 8) { 6174 assert(OrigIdx % 8 == 0 && "Invalid index"); 6175 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 6176 SubVecVT.getVectorMinNumElements() % 8 == 0 && 6177 "Unexpected mask vector lowering"); 6178 OrigIdx /= 8; 6179 SubVecVT = 6180 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 6181 SubVecVT.isScalableVector()); 6182 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 6183 VecVT.isScalableVector()); 6184 Vec = DAG.getBitcast(VecVT, Vec); 6185 SubVec = DAG.getBitcast(SubVecVT, SubVec); 6186 } else { 6187 // We can't slide this mask vector up indexed by its i1 elements. 6188 // This poses a problem when we wish to insert a scalable vector which 6189 // can't be re-expressed as a larger type. Just choose the slow path and 6190 // extend to a larger type, then truncate back down. 6191 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 6192 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 6193 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 6194 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 6195 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 6196 Op.getOperand(2)); 6197 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 6198 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 6199 } 6200 } 6201 6202 // If the subvector vector is a fixed-length type, we cannot use subregister 6203 // manipulation to simplify the codegen; we don't know which register of a 6204 // LMUL group contains the specific subvector as we only know the minimum 6205 // register size. Therefore we must slide the vector group up the full 6206 // amount. 6207 if (SubVecVT.isFixedLengthVector()) { 6208 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) 6209 return Op; 6210 MVT ContainerVT = VecVT; 6211 if (VecVT.isFixedLengthVector()) { 6212 ContainerVT = getContainerForFixedLengthVector(VecVT); 6213 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 6214 } 6215 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 6216 DAG.getUNDEF(ContainerVT), SubVec, 6217 DAG.getConstant(0, DL, XLenVT)); 6218 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { 6219 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); 6220 return DAG.getBitcast(Op.getValueType(), SubVec); 6221 } 6222 SDValue Mask = 6223 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 6224 // Set the vector length to only the number of elements we care about. Note 6225 // that for slideup this includes the offset. 6226 SDValue VL = 6227 getVLOp(OrigIdx + SubVecVT.getVectorNumElements(), DL, DAG, Subtarget); 6228 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 6229 6230 // Use tail agnostic policy if OrigIdx is the last index of Vec. 6231 unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; 6232 if (VecVT.isFixedLengthVector() && 6233 OrigIdx + 1 == VecVT.getVectorNumElements()) 6234 Policy = RISCVII::TAIL_AGNOSTIC; 6235 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec, 6236 SlideupAmt, Mask, VL, Policy); 6237 if (VecVT.isFixedLengthVector()) 6238 Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 6239 return DAG.getBitcast(Op.getValueType(), Slideup); 6240 } 6241 6242 unsigned SubRegIdx, RemIdx; 6243 std::tie(SubRegIdx, RemIdx) = 6244 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 6245 VecVT, SubVecVT, OrigIdx, TRI); 6246 6247 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 6248 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 6249 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 6250 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 6251 6252 // 1. If the Idx has been completely eliminated and this subvector's size is 6253 // a vector register or a multiple thereof, or the surrounding elements are 6254 // undef, then this is a subvector insert which naturally aligns to a vector 6255 // register. These can easily be handled using subregister manipulation. 6256 // 2. If the subvector is smaller than a vector register, then the insertion 6257 // must preserve the undisturbed elements of the register. We do this by 6258 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 6259 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 6260 // subvector within the vector register, and an INSERT_SUBVECTOR of that 6261 // LMUL=1 type back into the larger vector (resolving to another subregister 6262 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 6263 // to avoid allocating a large register group to hold our subvector. 6264 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 6265 return Op; 6266 6267 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 6268 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 6269 // (in our case undisturbed). This means we can set up a subvector insertion 6270 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 6271 // size of the subvector. 6272 MVT InterSubVT = VecVT; 6273 SDValue AlignedExtract = Vec; 6274 unsigned AlignedIdx = OrigIdx - RemIdx; 6275 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 6276 InterSubVT = getLMUL1VT(VecVT); 6277 // Extract a subvector equal to the nearest full vector register type. This 6278 // should resolve to a EXTRACT_SUBREG instruction. 6279 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 6280 DAG.getConstant(AlignedIdx, DL, XLenVT)); 6281 } 6282 6283 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 6284 // For scalable vectors this must be further multiplied by vscale. 6285 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 6286 6287 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 6288 6289 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 6290 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 6291 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 6292 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 6293 6294 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 6295 DAG.getUNDEF(InterSubVT), SubVec, 6296 DAG.getConstant(0, DL, XLenVT)); 6297 6298 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, 6299 SubVec, SlideupAmt, Mask, VL); 6300 6301 // If required, insert this subvector back into the correct vector register. 6302 // This should resolve to an INSERT_SUBREG instruction. 6303 if (VecVT.bitsGT(InterSubVT)) 6304 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 6305 DAG.getConstant(AlignedIdx, DL, XLenVT)); 6306 6307 // We might have bitcast from a mask type: cast back to the original type if 6308 // required. 6309 return DAG.getBitcast(Op.getSimpleValueType(), Slideup); 6310 } 6311 6312 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 6313 SelectionDAG &DAG) const { 6314 SDValue Vec = Op.getOperand(0); 6315 MVT SubVecVT = Op.getSimpleValueType(); 6316 MVT VecVT = Vec.getSimpleValueType(); 6317 6318 SDLoc DL(Op); 6319 MVT XLenVT = Subtarget.getXLenVT(); 6320 unsigned OrigIdx = Op.getConstantOperandVal(1); 6321 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 6322 6323 // We don't have the ability to slide mask vectors down indexed by their i1 6324 // elements; the smallest we can do is i8. Often we are able to bitcast to 6325 // equivalent i8 vectors. Note that when extracting a fixed-length vector 6326 // from a scalable one, we might not necessarily have enough scalable 6327 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 6328 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 6329 if (VecVT.getVectorMinNumElements() >= 8 && 6330 SubVecVT.getVectorMinNumElements() >= 8) { 6331 assert(OrigIdx % 8 == 0 && "Invalid index"); 6332 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 6333 SubVecVT.getVectorMinNumElements() % 8 == 0 && 6334 "Unexpected mask vector lowering"); 6335 OrigIdx /= 8; 6336 SubVecVT = 6337 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 6338 SubVecVT.isScalableVector()); 6339 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 6340 VecVT.isScalableVector()); 6341 Vec = DAG.getBitcast(VecVT, Vec); 6342 } else { 6343 // We can't slide this mask vector down, indexed by its i1 elements. 6344 // This poses a problem when we wish to extract a scalable vector which 6345 // can't be re-expressed as a larger type. Just choose the slow path and 6346 // extend to a larger type, then truncate back down. 6347 // TODO: We could probably improve this when extracting certain fixed 6348 // from fixed, where we can extract as i8 and shift the correct element 6349 // right to reach the desired subvector? 6350 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 6351 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 6352 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 6353 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 6354 Op.getOperand(1)); 6355 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 6356 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 6357 } 6358 } 6359 6360 // If the subvector vector is a fixed-length type, we cannot use subregister 6361 // manipulation to simplify the codegen; we don't know which register of a 6362 // LMUL group contains the specific subvector as we only know the minimum 6363 // register size. Therefore we must slide the vector group down the full 6364 // amount. 6365 if (SubVecVT.isFixedLengthVector()) { 6366 // With an index of 0 this is a cast-like subvector, which can be performed 6367 // with subregister operations. 6368 if (OrigIdx == 0) 6369 return Op; 6370 MVT ContainerVT = VecVT; 6371 if (VecVT.isFixedLengthVector()) { 6372 ContainerVT = getContainerForFixedLengthVector(VecVT); 6373 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 6374 } 6375 SDValue Mask = 6376 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 6377 // Set the vector length to only the number of elements we care about. This 6378 // avoids sliding down elements we're going to discard straight away. 6379 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget); 6380 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 6381 SDValue Slidedown = 6382 getVSlidedown(DAG, Subtarget, DL, ContainerVT, 6383 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 6384 // Now we can use a cast-like subvector extract to get the result. 6385 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 6386 DAG.getConstant(0, DL, XLenVT)); 6387 return DAG.getBitcast(Op.getValueType(), Slidedown); 6388 } 6389 6390 unsigned SubRegIdx, RemIdx; 6391 std::tie(SubRegIdx, RemIdx) = 6392 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 6393 VecVT, SubVecVT, OrigIdx, TRI); 6394 6395 // If the Idx has been completely eliminated then this is a subvector extract 6396 // which naturally aligns to a vector register. These can easily be handled 6397 // using subregister manipulation. 6398 if (RemIdx == 0) 6399 return Op; 6400 6401 // Else we must shift our vector register directly to extract the subvector. 6402 // Do this using VSLIDEDOWN. 6403 6404 // If the vector type is an LMUL-group type, extract a subvector equal to the 6405 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 6406 // instruction. 6407 MVT InterSubVT = VecVT; 6408 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 6409 InterSubVT = getLMUL1VT(VecVT); 6410 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 6411 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 6412 } 6413 6414 // Slide this vector register down by the desired number of elements in order 6415 // to place the desired subvector starting at element 0. 6416 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 6417 // For scalable vectors this must be further multiplied by vscale. 6418 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 6419 6420 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 6421 SDValue Slidedown = 6422 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT), 6423 Vec, SlidedownAmt, Mask, VL); 6424 6425 // Now the vector is in the right position, extract our final subvector. This 6426 // should resolve to a COPY. 6427 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 6428 DAG.getConstant(0, DL, XLenVT)); 6429 6430 // We might have bitcast from a mask type: cast back to the original type if 6431 // required. 6432 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 6433 } 6434 6435 // Lower step_vector to the vid instruction. Any non-identity step value must 6436 // be accounted for my manual expansion. 6437 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, 6438 SelectionDAG &DAG) const { 6439 SDLoc DL(Op); 6440 MVT VT = Op.getSimpleValueType(); 6441 assert(VT.isScalableVector() && "Expected scalable vector"); 6442 MVT XLenVT = Subtarget.getXLenVT(); 6443 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 6444 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 6445 uint64_t StepValImm = Op.getConstantOperandVal(0); 6446 if (StepValImm != 1) { 6447 if (isPowerOf2_64(StepValImm)) { 6448 SDValue StepVal = 6449 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 6450 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL); 6451 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); 6452 } else { 6453 SDValue StepVal = lowerScalarSplat( 6454 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), 6455 VL, VT, DL, DAG, Subtarget); 6456 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); 6457 } 6458 } 6459 return StepVec; 6460 } 6461 6462 // Implement vector_reverse using vrgather.vv with indices determined by 6463 // subtracting the id of each element from (VLMAX-1). This will convert 6464 // the indices like so: 6465 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 6466 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 6467 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 6468 SelectionDAG &DAG) const { 6469 SDLoc DL(Op); 6470 MVT VecVT = Op.getSimpleValueType(); 6471 if (VecVT.getVectorElementType() == MVT::i1) { 6472 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 6473 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0)); 6474 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1); 6475 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2); 6476 } 6477 unsigned EltSize = VecVT.getScalarSizeInBits(); 6478 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 6479 unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); 6480 unsigned MaxVLMAX = 6481 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); 6482 6483 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 6484 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 6485 6486 // If this is SEW=8 and VLMAX is potentially more than 256, we need 6487 // to use vrgatherei16.vv. 6488 // TODO: It's also possible to use vrgatherei16.vv for other types to 6489 // decrease register width for the index calculation. 6490 if (MaxVLMAX > 256 && EltSize == 8) { 6491 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 6492 // Reverse each half, then reassemble them in reverse order. 6493 // NOTE: It's also possible that after splitting that VLMAX no longer 6494 // requires vrgatherei16.vv. 6495 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 6496 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); 6497 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT); 6498 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 6499 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 6500 // Reassemble the low and high pieces reversed. 6501 // FIXME: This is a CONCAT_VECTORS. 6502 SDValue Res = 6503 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 6504 DAG.getIntPtrConstant(0, DL)); 6505 return DAG.getNode( 6506 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 6507 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 6508 } 6509 6510 // Just promote the int type to i16 which will double the LMUL. 6511 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 6512 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 6513 } 6514 6515 MVT XLenVT = Subtarget.getXLenVT(); 6516 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 6517 6518 // Calculate VLMAX-1 for the desired SEW. 6519 unsigned MinElts = VecVT.getVectorMinNumElements(); 6520 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 6521 getVLOp(MinElts, DL, DAG, Subtarget)); 6522 SDValue VLMinus1 = 6523 DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT)); 6524 6525 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 6526 bool IsRV32E64 = 6527 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 6528 SDValue SplatVL; 6529 if (!IsRV32E64) 6530 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 6531 else 6532 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT), 6533 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT)); 6534 6535 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 6536 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, 6537 DAG.getUNDEF(IntVT), Mask, VL); 6538 6539 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, 6540 DAG.getUNDEF(VecVT), Mask, VL); 6541 } 6542 6543 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, 6544 SelectionDAG &DAG) const { 6545 SDLoc DL(Op); 6546 SDValue V1 = Op.getOperand(0); 6547 SDValue V2 = Op.getOperand(1); 6548 MVT XLenVT = Subtarget.getXLenVT(); 6549 MVT VecVT = Op.getSimpleValueType(); 6550 6551 unsigned MinElts = VecVT.getVectorMinNumElements(); 6552 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 6553 getVLOp(MinElts, DL, DAG, Subtarget)); 6554 6555 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); 6556 SDValue DownOffset, UpOffset; 6557 if (ImmValue >= 0) { 6558 // The operand is a TargetConstant, we need to rebuild it as a regular 6559 // constant. 6560 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); 6561 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset); 6562 } else { 6563 // The operand is a TargetConstant, we need to rebuild it as a regular 6564 // constant rather than negating the original operand. 6565 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); 6566 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset); 6567 } 6568 6569 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG); 6570 6571 SDValue SlideDown = 6572 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, 6573 DownOffset, TrueMask, UpOffset); 6574 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset, 6575 TrueMask, DAG.getRegister(RISCV::X0, XLenVT), 6576 RISCVII::TAIL_AGNOSTIC); 6577 } 6578 6579 SDValue 6580 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 6581 SelectionDAG &DAG) const { 6582 SDLoc DL(Op); 6583 auto *Load = cast<LoadSDNode>(Op); 6584 6585 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 6586 Load->getMemoryVT(), 6587 *Load->getMemOperand()) && 6588 "Expecting a correctly-aligned load"); 6589 6590 MVT VT = Op.getSimpleValueType(); 6591 MVT XLenVT = Subtarget.getXLenVT(); 6592 MVT ContainerVT = getContainerForFixedLengthVector(VT); 6593 6594 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); 6595 6596 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 6597 SDValue IntID = DAG.getTargetConstant( 6598 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT); 6599 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID}; 6600 if (!IsMaskOp) 6601 Ops.push_back(DAG.getUNDEF(ContainerVT)); 6602 Ops.push_back(Load->getBasePtr()); 6603 Ops.push_back(VL); 6604 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 6605 SDValue NewLoad = 6606 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 6607 Load->getMemoryVT(), Load->getMemOperand()); 6608 6609 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 6610 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL); 6611 } 6612 6613 SDValue 6614 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 6615 SelectionDAG &DAG) const { 6616 SDLoc DL(Op); 6617 auto *Store = cast<StoreSDNode>(Op); 6618 6619 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 6620 Store->getMemoryVT(), 6621 *Store->getMemOperand()) && 6622 "Expecting a correctly-aligned store"); 6623 6624 SDValue StoreVal = Store->getValue(); 6625 MVT VT = StoreVal.getSimpleValueType(); 6626 MVT XLenVT = Subtarget.getXLenVT(); 6627 6628 // If the size less than a byte, we need to pad with zeros to make a byte. 6629 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { 6630 VT = MVT::v8i1; 6631 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 6632 DAG.getConstant(0, DL, VT), StoreVal, 6633 DAG.getIntPtrConstant(0, DL)); 6634 } 6635 6636 MVT ContainerVT = getContainerForFixedLengthVector(VT); 6637 6638 SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget); 6639 6640 SDValue NewValue = 6641 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 6642 6643 bool IsMaskOp = VT.getVectorElementType() == MVT::i1; 6644 SDValue IntID = DAG.getTargetConstant( 6645 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT); 6646 return DAG.getMemIntrinsicNode( 6647 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), 6648 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL}, 6649 Store->getMemoryVT(), Store->getMemOperand()); 6650 } 6651 6652 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, 6653 SelectionDAG &DAG) const { 6654 SDLoc DL(Op); 6655 MVT VT = Op.getSimpleValueType(); 6656 6657 const auto *MemSD = cast<MemSDNode>(Op); 6658 EVT MemVT = MemSD->getMemoryVT(); 6659 MachineMemOperand *MMO = MemSD->getMemOperand(); 6660 SDValue Chain = MemSD->getChain(); 6661 SDValue BasePtr = MemSD->getBasePtr(); 6662 6663 SDValue Mask, PassThru, VL; 6664 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) { 6665 Mask = VPLoad->getMask(); 6666 PassThru = DAG.getUNDEF(VT); 6667 VL = VPLoad->getVectorLength(); 6668 } else { 6669 const auto *MLoad = cast<MaskedLoadSDNode>(Op); 6670 Mask = MLoad->getMask(); 6671 PassThru = MLoad->getPassThru(); 6672 } 6673 6674 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 6675 6676 MVT XLenVT = Subtarget.getXLenVT(); 6677 6678 MVT ContainerVT = VT; 6679 if (VT.isFixedLengthVector()) { 6680 ContainerVT = getContainerForFixedLengthVector(VT); 6681 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 6682 if (!IsUnmasked) { 6683 MVT MaskVT = getMaskTypeFor(ContainerVT); 6684 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 6685 } 6686 } 6687 6688 if (!VL) 6689 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 6690 6691 unsigned IntID = 6692 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; 6693 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 6694 if (IsUnmasked) 6695 Ops.push_back(DAG.getUNDEF(ContainerVT)); 6696 else 6697 Ops.push_back(PassThru); 6698 Ops.push_back(BasePtr); 6699 if (!IsUnmasked) 6700 Ops.push_back(Mask); 6701 Ops.push_back(VL); 6702 if (!IsUnmasked) 6703 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 6704 6705 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 6706 6707 SDValue Result = 6708 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 6709 Chain = Result.getValue(1); 6710 6711 if (VT.isFixedLengthVector()) 6712 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 6713 6714 return DAG.getMergeValues({Result, Chain}, DL); 6715 } 6716 6717 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, 6718 SelectionDAG &DAG) const { 6719 SDLoc DL(Op); 6720 6721 const auto *MemSD = cast<MemSDNode>(Op); 6722 EVT MemVT = MemSD->getMemoryVT(); 6723 MachineMemOperand *MMO = MemSD->getMemOperand(); 6724 SDValue Chain = MemSD->getChain(); 6725 SDValue BasePtr = MemSD->getBasePtr(); 6726 SDValue Val, Mask, VL; 6727 6728 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) { 6729 Val = VPStore->getValue(); 6730 Mask = VPStore->getMask(); 6731 VL = VPStore->getVectorLength(); 6732 } else { 6733 const auto *MStore = cast<MaskedStoreSDNode>(Op); 6734 Val = MStore->getValue(); 6735 Mask = MStore->getMask(); 6736 } 6737 6738 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 6739 6740 MVT VT = Val.getSimpleValueType(); 6741 MVT XLenVT = Subtarget.getXLenVT(); 6742 6743 MVT ContainerVT = VT; 6744 if (VT.isFixedLengthVector()) { 6745 ContainerVT = getContainerForFixedLengthVector(VT); 6746 6747 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 6748 if (!IsUnmasked) { 6749 MVT MaskVT = getMaskTypeFor(ContainerVT); 6750 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 6751 } 6752 } 6753 6754 if (!VL) 6755 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 6756 6757 unsigned IntID = 6758 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; 6759 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 6760 Ops.push_back(Val); 6761 Ops.push_back(BasePtr); 6762 if (!IsUnmasked) 6763 Ops.push_back(Mask); 6764 Ops.push_back(VL); 6765 6766 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 6767 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 6768 } 6769 6770 SDValue 6771 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 6772 SelectionDAG &DAG) const { 6773 MVT InVT = Op.getOperand(0).getSimpleValueType(); 6774 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 6775 6776 MVT VT = Op.getSimpleValueType(); 6777 6778 SDValue Op1 = 6779 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 6780 SDValue Op2 = 6781 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 6782 6783 SDLoc DL(Op); 6784 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL, 6785 DAG, Subtarget); 6786 MVT MaskVT = getMaskTypeFor(ContainerVT); 6787 6788 SDValue Cmp = 6789 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, 6790 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL}); 6791 6792 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 6793 } 6794 6795 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 6796 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 6797 MVT VT = Op.getSimpleValueType(); 6798 6799 if (VT.getVectorElementType() == MVT::i1) 6800 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMergeOp*/ false, 6801 /*HasMask*/ false); 6802 6803 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMergeOp*/ true); 6804 } 6805 6806 SDValue 6807 RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op, 6808 SelectionDAG &DAG) const { 6809 unsigned Opc; 6810 switch (Op.getOpcode()) { 6811 default: llvm_unreachable("Unexpected opcode!"); 6812 case ISD::SHL: Opc = RISCVISD::SHL_VL; break; 6813 case ISD::SRA: Opc = RISCVISD::SRA_VL; break; 6814 case ISD::SRL: Opc = RISCVISD::SRL_VL; break; 6815 } 6816 6817 return lowerToScalableOp(Op, DAG, Opc, /*HasMergeOp*/ true); 6818 } 6819 6820 // Lower vector ABS to smax(X, sub(0, X)). 6821 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 6822 SDLoc DL(Op); 6823 MVT VT = Op.getSimpleValueType(); 6824 SDValue X = Op.getOperand(0); 6825 6826 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) && 6827 "Unexpected type for ISD::ABS"); 6828 6829 MVT ContainerVT = VT; 6830 if (VT.isFixedLengthVector()) { 6831 ContainerVT = getContainerForFixedLengthVector(VT); 6832 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 6833 } 6834 6835 SDValue Mask, VL; 6836 if (Op->getOpcode() == ISD::VP_ABS) { 6837 Mask = Op->getOperand(1); 6838 VL = Op->getOperand(2); 6839 } else 6840 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 6841 6842 SDValue SplatZero = DAG.getNode( 6843 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), 6844 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); 6845 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, 6846 DAG.getUNDEF(ContainerVT), Mask, VL); 6847 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, 6848 DAG.getUNDEF(ContainerVT), Mask, VL); 6849 6850 if (VT.isFixedLengthVector()) 6851 Max = convertFromScalableVector(VT, Max, DAG, Subtarget); 6852 return Max; 6853 } 6854 6855 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 6856 SDValue Op, SelectionDAG &DAG) const { 6857 SDLoc DL(Op); 6858 MVT VT = Op.getSimpleValueType(); 6859 SDValue Mag = Op.getOperand(0); 6860 SDValue Sign = Op.getOperand(1); 6861 assert(Mag.getValueType() == Sign.getValueType() && 6862 "Can only handle COPYSIGN with matching types."); 6863 6864 MVT ContainerVT = getContainerForFixedLengthVector(VT); 6865 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 6866 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 6867 6868 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 6869 6870 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, 6871 Sign, DAG.getUNDEF(ContainerVT), Mask, VL); 6872 6873 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 6874 } 6875 6876 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 6877 SDValue Op, SelectionDAG &DAG) const { 6878 MVT VT = Op.getSimpleValueType(); 6879 MVT ContainerVT = getContainerForFixedLengthVector(VT); 6880 6881 MVT I1ContainerVT = 6882 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 6883 6884 SDValue CC = 6885 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 6886 SDValue Op1 = 6887 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 6888 SDValue Op2 = 6889 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 6890 6891 SDLoc DL(Op); 6892 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 6893 6894 SDValue Select = 6895 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 6896 6897 return convertFromScalableVector(VT, Select, DAG, Subtarget); 6898 } 6899 6900 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 6901 unsigned NewOpc, bool HasMergeOp, 6902 bool HasMask) const { 6903 MVT VT = Op.getSimpleValueType(); 6904 MVT ContainerVT = getContainerForFixedLengthVector(VT); 6905 6906 // Create list of operands by converting existing ones to scalable types. 6907 SmallVector<SDValue, 6> Ops; 6908 for (const SDValue &V : Op->op_values()) { 6909 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 6910 6911 // Pass through non-vector operands. 6912 if (!V.getValueType().isVector()) { 6913 Ops.push_back(V); 6914 continue; 6915 } 6916 6917 // "cast" fixed length vector to a scalable vector. 6918 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 6919 "Only fixed length vectors are supported!"); 6920 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 6921 } 6922 6923 SDLoc DL(Op); 6924 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 6925 if (HasMergeOp) 6926 Ops.push_back(DAG.getUNDEF(ContainerVT)); 6927 if (HasMask) 6928 Ops.push_back(Mask); 6929 Ops.push_back(VL); 6930 6931 SDValue ScalableRes = 6932 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags()); 6933 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 6934 } 6935 6936 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: 6937 // * Operands of each node are assumed to be in the same order. 6938 // * The EVL operand is promoted from i32 to i64 on RV64. 6939 // * Fixed-length vectors are converted to their scalable-vector container 6940 // types. 6941 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, 6942 unsigned RISCVISDOpc, 6943 bool HasMergeOp) const { 6944 SDLoc DL(Op); 6945 MVT VT = Op.getSimpleValueType(); 6946 SmallVector<SDValue, 4> Ops; 6947 6948 MVT ContainerVT = VT; 6949 if (VT.isFixedLengthVector()) 6950 ContainerVT = getContainerForFixedLengthVector(VT); 6951 6952 for (const auto &OpIdx : enumerate(Op->ops())) { 6953 SDValue V = OpIdx.value(); 6954 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 6955 // Add dummy merge value before the mask. 6956 if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index()) 6957 Ops.push_back(DAG.getUNDEF(ContainerVT)); 6958 // Pass through operands which aren't fixed-length vectors. 6959 if (!V.getValueType().isFixedLengthVector()) { 6960 Ops.push_back(V); 6961 continue; 6962 } 6963 // "cast" fixed length vector to a scalable vector. 6964 MVT OpVT = V.getSimpleValueType(); 6965 MVT ContainerVT = getContainerForFixedLengthVector(OpVT); 6966 assert(useRVVForFixedLengthVectorVT(OpVT) && 6967 "Only fixed length vectors are supported!"); 6968 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 6969 } 6970 6971 if (!VT.isFixedLengthVector()) 6972 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags()); 6973 6974 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags()); 6975 6976 return convertFromScalableVector(VT, VPOp, DAG, Subtarget); 6977 } 6978 6979 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, 6980 SelectionDAG &DAG) const { 6981 SDLoc DL(Op); 6982 MVT VT = Op.getSimpleValueType(); 6983 6984 SDValue Src = Op.getOperand(0); 6985 // NOTE: Mask is dropped. 6986 SDValue VL = Op.getOperand(2); 6987 6988 MVT ContainerVT = VT; 6989 if (VT.isFixedLengthVector()) { 6990 ContainerVT = getContainerForFixedLengthVector(VT); 6991 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 6992 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 6993 } 6994 6995 MVT XLenVT = Subtarget.getXLenVT(); 6996 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 6997 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 6998 DAG.getUNDEF(ContainerVT), Zero, VL); 6999 7000 SDValue SplatValue = DAG.getConstant( 7001 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT); 7002 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 7003 DAG.getUNDEF(ContainerVT), SplatValue, VL); 7004 7005 SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src, 7006 Splat, ZeroSplat, VL); 7007 if (!VT.isFixedLengthVector()) 7008 return Result; 7009 return convertFromScalableVector(VT, Result, DAG, Subtarget); 7010 } 7011 7012 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op, 7013 SelectionDAG &DAG) const { 7014 SDLoc DL(Op); 7015 MVT VT = Op.getSimpleValueType(); 7016 7017 SDValue Op1 = Op.getOperand(0); 7018 SDValue Op2 = Op.getOperand(1); 7019 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 7020 // NOTE: Mask is dropped. 7021 SDValue VL = Op.getOperand(4); 7022 7023 MVT ContainerVT = VT; 7024 if (VT.isFixedLengthVector()) { 7025 ContainerVT = getContainerForFixedLengthVector(VT); 7026 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 7027 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 7028 } 7029 7030 SDValue Result; 7031 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 7032 7033 switch (Condition) { 7034 default: 7035 break; 7036 // X != Y --> (X^Y) 7037 case ISD::SETNE: 7038 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 7039 break; 7040 // X == Y --> ~(X^Y) 7041 case ISD::SETEQ: { 7042 SDValue Temp = 7043 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL); 7044 Result = 7045 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL); 7046 break; 7047 } 7048 // X >s Y --> X == 0 & Y == 1 --> ~X & Y 7049 // X <u Y --> X == 0 & Y == 1 --> ~X & Y 7050 case ISD::SETGT: 7051 case ISD::SETULT: { 7052 SDValue Temp = 7053 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 7054 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL); 7055 break; 7056 } 7057 // X <s Y --> X == 1 & Y == 0 --> ~Y & X 7058 // X >u Y --> X == 1 & Y == 0 --> ~Y & X 7059 case ISD::SETLT: 7060 case ISD::SETUGT: { 7061 SDValue Temp = 7062 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 7063 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL); 7064 break; 7065 } 7066 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y 7067 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y 7068 case ISD::SETGE: 7069 case ISD::SETULE: { 7070 SDValue Temp = 7071 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL); 7072 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL); 7073 break; 7074 } 7075 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X 7076 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X 7077 case ISD::SETLE: 7078 case ISD::SETUGE: { 7079 SDValue Temp = 7080 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL); 7081 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL); 7082 break; 7083 } 7084 } 7085 7086 if (!VT.isFixedLengthVector()) 7087 return Result; 7088 return convertFromScalableVector(VT, Result, DAG, Subtarget); 7089 } 7090 7091 // Lower Floating-Point/Integer Type-Convert VP SDNodes 7092 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG, 7093 unsigned RISCVISDOpc) const { 7094 SDLoc DL(Op); 7095 7096 SDValue Src = Op.getOperand(0); 7097 SDValue Mask = Op.getOperand(1); 7098 SDValue VL = Op.getOperand(2); 7099 7100 MVT DstVT = Op.getSimpleValueType(); 7101 MVT SrcVT = Src.getSimpleValueType(); 7102 if (DstVT.isFixedLengthVector()) { 7103 DstVT = getContainerForFixedLengthVector(DstVT); 7104 SrcVT = getContainerForFixedLengthVector(SrcVT); 7105 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget); 7106 MVT MaskVT = getMaskTypeFor(DstVT); 7107 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7108 } 7109 7110 unsigned DstEltSize = DstVT.getScalarSizeInBits(); 7111 unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); 7112 7113 SDValue Result; 7114 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion. 7115 if (SrcVT.isInteger()) { 7116 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 7117 7118 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL 7119 ? RISCVISD::VSEXT_VL 7120 : RISCVISD::VZEXT_VL; 7121 7122 // Do we need to do any pre-widening before converting? 7123 if (SrcEltSize == 1) { 7124 MVT IntVT = DstVT.changeVectorElementTypeToInteger(); 7125 MVT XLenVT = Subtarget.getXLenVT(); 7126 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 7127 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 7128 DAG.getUNDEF(IntVT), Zero, VL); 7129 SDValue One = DAG.getConstant( 7130 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT); 7131 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, 7132 DAG.getUNDEF(IntVT), One, VL); 7133 Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat, 7134 ZeroSplat, VL); 7135 } else if (DstEltSize > (2 * SrcEltSize)) { 7136 // Widen before converting. 7137 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2), 7138 DstVT.getVectorElementCount()); 7139 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL); 7140 } 7141 7142 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 7143 } else { 7144 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 7145 "Wrong input/output vector types"); 7146 7147 // Convert f16 to f32 then convert f32 to i64. 7148 if (DstEltSize > (2 * SrcEltSize)) { 7149 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 7150 MVT InterimFVT = 7151 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 7152 Src = 7153 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL); 7154 } 7155 7156 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); 7157 } 7158 } else { // Narrowing + Conversion 7159 if (SrcVT.isInteger()) { 7160 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types"); 7161 // First do a narrowing convert to an FP type half the size, then round 7162 // the FP type to a small FP type if needed. 7163 7164 MVT InterimFVT = DstVT; 7165 if (SrcEltSize > (2 * DstEltSize)) { 7166 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!"); 7167 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); 7168 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); 7169 } 7170 7171 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL); 7172 7173 if (InterimFVT != DstVT) { 7174 Src = Result; 7175 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL); 7176 } 7177 } else { 7178 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && 7179 "Wrong input/output vector types"); 7180 // First do a narrowing conversion to an integer half the size, then 7181 // truncate if needed. 7182 7183 if (DstEltSize == 1) { 7184 // First convert to the same size integer, then convert to mask using 7185 // setcc. 7186 assert(SrcEltSize >= 16 && "Unexpected FP type!"); 7187 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize), 7188 DstVT.getVectorElementCount()); 7189 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 7190 7191 // Compare the integer result to 0. The integer should be 0 or 1/-1, 7192 // otherwise the conversion was undefined. 7193 MVT XLenVT = Subtarget.getXLenVT(); 7194 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 7195 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, 7196 DAG.getUNDEF(InterimIVT), SplatZero, VL); 7197 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, 7198 {Result, SplatZero, DAG.getCondCode(ISD::SETNE), 7199 DAG.getUNDEF(DstVT), Mask, VL}); 7200 } else { 7201 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 7202 DstVT.getVectorElementCount()); 7203 7204 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); 7205 7206 while (InterimIVT != DstVT) { 7207 SrcEltSize /= 2; 7208 Src = Result; 7209 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), 7210 DstVT.getVectorElementCount()); 7211 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT, 7212 Src, Mask, VL); 7213 } 7214 } 7215 } 7216 } 7217 7218 MVT VT = Op.getSimpleValueType(); 7219 if (!VT.isFixedLengthVector()) 7220 return Result; 7221 return convertFromScalableVector(VT, Result, DAG, Subtarget); 7222 } 7223 7224 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, 7225 unsigned MaskOpc, 7226 unsigned VecOpc) const { 7227 MVT VT = Op.getSimpleValueType(); 7228 if (VT.getVectorElementType() != MVT::i1) 7229 return lowerVPOp(Op, DAG, VecOpc, true); 7230 7231 // It is safe to drop mask parameter as masked-off elements are undef. 7232 SDValue Op1 = Op->getOperand(0); 7233 SDValue Op2 = Op->getOperand(1); 7234 SDValue VL = Op->getOperand(3); 7235 7236 MVT ContainerVT = VT; 7237 const bool IsFixed = VT.isFixedLengthVector(); 7238 if (IsFixed) { 7239 ContainerVT = getContainerForFixedLengthVector(VT); 7240 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); 7241 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); 7242 } 7243 7244 SDLoc DL(Op); 7245 SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL); 7246 if (!IsFixed) 7247 return Val; 7248 return convertFromScalableVector(VT, Val, DAG, Subtarget); 7249 } 7250 7251 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op, 7252 SelectionDAG &DAG) const { 7253 SDLoc DL(Op); 7254 MVT XLenVT = Subtarget.getXLenVT(); 7255 MVT VT = Op.getSimpleValueType(); 7256 MVT ContainerVT = VT; 7257 if (VT.isFixedLengthVector()) 7258 ContainerVT = getContainerForFixedLengthVector(VT); 7259 7260 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 7261 7262 auto *VPNode = cast<VPStridedLoadSDNode>(Op); 7263 // Check if the mask is known to be all ones 7264 SDValue Mask = VPNode->getMask(); 7265 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 7266 7267 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse 7268 : Intrinsic::riscv_vlse_mask, 7269 DL, XLenVT); 7270 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, 7271 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(), 7272 VPNode->getStride()}; 7273 if (!IsUnmasked) { 7274 if (VT.isFixedLengthVector()) { 7275 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 7276 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7277 } 7278 Ops.push_back(Mask); 7279 } 7280 Ops.push_back(VPNode->getVectorLength()); 7281 if (!IsUnmasked) { 7282 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 7283 Ops.push_back(Policy); 7284 } 7285 7286 SDValue Result = 7287 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 7288 VPNode->getMemoryVT(), VPNode->getMemOperand()); 7289 SDValue Chain = Result.getValue(1); 7290 7291 if (VT.isFixedLengthVector()) 7292 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 7293 7294 return DAG.getMergeValues({Result, Chain}, DL); 7295 } 7296 7297 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op, 7298 SelectionDAG &DAG) const { 7299 SDLoc DL(Op); 7300 MVT XLenVT = Subtarget.getXLenVT(); 7301 7302 auto *VPNode = cast<VPStridedStoreSDNode>(Op); 7303 SDValue StoreVal = VPNode->getValue(); 7304 MVT VT = StoreVal.getSimpleValueType(); 7305 MVT ContainerVT = VT; 7306 if (VT.isFixedLengthVector()) { 7307 ContainerVT = getContainerForFixedLengthVector(VT); 7308 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 7309 } 7310 7311 // Check if the mask is known to be all ones 7312 SDValue Mask = VPNode->getMask(); 7313 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 7314 7315 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse 7316 : Intrinsic::riscv_vsse_mask, 7317 DL, XLenVT); 7318 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal, 7319 VPNode->getBasePtr(), VPNode->getStride()}; 7320 if (!IsUnmasked) { 7321 if (VT.isFixedLengthVector()) { 7322 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1); 7323 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7324 } 7325 Ops.push_back(Mask); 7326 } 7327 Ops.push_back(VPNode->getVectorLength()); 7328 7329 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(), 7330 Ops, VPNode->getMemoryVT(), 7331 VPNode->getMemOperand()); 7332 } 7333 7334 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be 7335 // matched to a RVV indexed load. The RVV indexed load instructions only 7336 // support the "unsigned unscaled" addressing mode; indices are implicitly 7337 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 7338 // signed or scaled indexing is extended to the XLEN value type and scaled 7339 // accordingly. 7340 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, 7341 SelectionDAG &DAG) const { 7342 SDLoc DL(Op); 7343 MVT VT = Op.getSimpleValueType(); 7344 7345 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 7346 EVT MemVT = MemSD->getMemoryVT(); 7347 MachineMemOperand *MMO = MemSD->getMemOperand(); 7348 SDValue Chain = MemSD->getChain(); 7349 SDValue BasePtr = MemSD->getBasePtr(); 7350 7351 ISD::LoadExtType LoadExtType; 7352 SDValue Index, Mask, PassThru, VL; 7353 7354 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) { 7355 Index = VPGN->getIndex(); 7356 Mask = VPGN->getMask(); 7357 PassThru = DAG.getUNDEF(VT); 7358 VL = VPGN->getVectorLength(); 7359 // VP doesn't support extending loads. 7360 LoadExtType = ISD::NON_EXTLOAD; 7361 } else { 7362 // Else it must be a MGATHER. 7363 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); 7364 Index = MGN->getIndex(); 7365 Mask = MGN->getMask(); 7366 PassThru = MGN->getPassThru(); 7367 LoadExtType = MGN->getExtensionType(); 7368 } 7369 7370 MVT IndexVT = Index.getSimpleValueType(); 7371 MVT XLenVT = Subtarget.getXLenVT(); 7372 7373 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 7374 "Unexpected VTs!"); 7375 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 7376 // Targets have to explicitly opt-in for extending vector loads. 7377 assert(LoadExtType == ISD::NON_EXTLOAD && 7378 "Unexpected extending MGATHER/VP_GATHER"); 7379 (void)LoadExtType; 7380 7381 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 7382 // the selection of the masked intrinsics doesn't do this for us. 7383 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 7384 7385 MVT ContainerVT = VT; 7386 if (VT.isFixedLengthVector()) { 7387 ContainerVT = getContainerForFixedLengthVector(VT); 7388 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 7389 ContainerVT.getVectorElementCount()); 7390 7391 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 7392 7393 if (!IsUnmasked) { 7394 MVT MaskVT = getMaskTypeFor(ContainerVT); 7395 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7396 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 7397 } 7398 } 7399 7400 if (!VL) 7401 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 7402 7403 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 7404 IndexVT = IndexVT.changeVectorElementType(XLenVT); 7405 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(), 7406 VL); 7407 Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index, 7408 TrueMask, VL); 7409 } 7410 7411 unsigned IntID = 7412 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; 7413 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 7414 if (IsUnmasked) 7415 Ops.push_back(DAG.getUNDEF(ContainerVT)); 7416 else 7417 Ops.push_back(PassThru); 7418 Ops.push_back(BasePtr); 7419 Ops.push_back(Index); 7420 if (!IsUnmasked) 7421 Ops.push_back(Mask); 7422 Ops.push_back(VL); 7423 if (!IsUnmasked) 7424 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 7425 7426 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 7427 SDValue Result = 7428 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 7429 Chain = Result.getValue(1); 7430 7431 if (VT.isFixedLengthVector()) 7432 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 7433 7434 return DAG.getMergeValues({Result, Chain}, DL); 7435 } 7436 7437 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be 7438 // matched to a RVV indexed store. The RVV indexed store instructions only 7439 // support the "unsigned unscaled" addressing mode; indices are implicitly 7440 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 7441 // signed or scaled indexing is extended to the XLEN value type and scaled 7442 // accordingly. 7443 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, 7444 SelectionDAG &DAG) const { 7445 SDLoc DL(Op); 7446 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 7447 EVT MemVT = MemSD->getMemoryVT(); 7448 MachineMemOperand *MMO = MemSD->getMemOperand(); 7449 SDValue Chain = MemSD->getChain(); 7450 SDValue BasePtr = MemSD->getBasePtr(); 7451 7452 bool IsTruncatingStore = false; 7453 SDValue Index, Mask, Val, VL; 7454 7455 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) { 7456 Index = VPSN->getIndex(); 7457 Mask = VPSN->getMask(); 7458 Val = VPSN->getValue(); 7459 VL = VPSN->getVectorLength(); 7460 // VP doesn't support truncating stores. 7461 IsTruncatingStore = false; 7462 } else { 7463 // Else it must be a MSCATTER. 7464 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); 7465 Index = MSN->getIndex(); 7466 Mask = MSN->getMask(); 7467 Val = MSN->getValue(); 7468 IsTruncatingStore = MSN->isTruncatingStore(); 7469 } 7470 7471 MVT VT = Val.getSimpleValueType(); 7472 MVT IndexVT = Index.getSimpleValueType(); 7473 MVT XLenVT = Subtarget.getXLenVT(); 7474 7475 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 7476 "Unexpected VTs!"); 7477 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 7478 // Targets have to explicitly opt-in for extending vector loads and 7479 // truncating vector stores. 7480 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER"); 7481 (void)IsTruncatingStore; 7482 7483 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 7484 // the selection of the masked intrinsics doesn't do this for us. 7485 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 7486 7487 MVT ContainerVT = VT; 7488 if (VT.isFixedLengthVector()) { 7489 ContainerVT = getContainerForFixedLengthVector(VT); 7490 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 7491 ContainerVT.getVectorElementCount()); 7492 7493 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 7494 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 7495 7496 if (!IsUnmasked) { 7497 MVT MaskVT = getMaskTypeFor(ContainerVT); 7498 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 7499 } 7500 } 7501 7502 if (!VL) 7503 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 7504 7505 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) { 7506 IndexVT = IndexVT.changeVectorElementType(XLenVT); 7507 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(), 7508 VL); 7509 Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index, 7510 TrueMask, VL); 7511 } 7512 7513 unsigned IntID = 7514 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; 7515 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 7516 Ops.push_back(Val); 7517 Ops.push_back(BasePtr); 7518 Ops.push_back(Index); 7519 if (!IsUnmasked) 7520 Ops.push_back(Mask); 7521 Ops.push_back(VL); 7522 7523 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 7524 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 7525 } 7526 7527 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op, 7528 SelectionDAG &DAG) const { 7529 const MVT XLenVT = Subtarget.getXLenVT(); 7530 SDLoc DL(Op); 7531 SDValue Chain = Op->getOperand(0); 7532 SDValue SysRegNo = DAG.getTargetConstant( 7533 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 7534 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other); 7535 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo); 7536 7537 // Encoding used for rounding mode in RISCV differs from that used in 7538 // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a 7539 // table, which consists of a sequence of 4-bit fields, each representing 7540 // corresponding FLT_ROUNDS mode. 7541 static const int Table = 7542 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) | 7543 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) | 7544 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) | 7545 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) | 7546 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM); 7547 7548 SDValue Shift = 7549 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT)); 7550 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 7551 DAG.getConstant(Table, DL, XLenVT), Shift); 7552 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 7553 DAG.getConstant(7, DL, XLenVT)); 7554 7555 return DAG.getMergeValues({Masked, Chain}, DL); 7556 } 7557 7558 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, 7559 SelectionDAG &DAG) const { 7560 const MVT XLenVT = Subtarget.getXLenVT(); 7561 SDLoc DL(Op); 7562 SDValue Chain = Op->getOperand(0); 7563 SDValue RMValue = Op->getOperand(1); 7564 SDValue SysRegNo = DAG.getTargetConstant( 7565 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 7566 7567 // Encoding used for rounding mode in RISCV differs from that used in 7568 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in 7569 // a table, which consists of a sequence of 4-bit fields, each representing 7570 // corresponding RISCV mode. 7571 static const unsigned Table = 7572 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) | 7573 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) | 7574 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) | 7575 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | 7576 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); 7577 7578 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue, 7579 DAG.getConstant(2, DL, XLenVT)); 7580 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 7581 DAG.getConstant(Table, DL, XLenVT), Shift); 7582 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 7583 DAG.getConstant(0x7, DL, XLenVT)); 7584 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo, 7585 RMValue); 7586 } 7587 7588 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op, 7589 SelectionDAG &DAG) const { 7590 MachineFunction &MF = DAG.getMachineFunction(); 7591 7592 bool isRISCV64 = Subtarget.is64Bit(); 7593 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 7594 7595 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false); 7596 return DAG.getFrameIndex(FI, PtrVT); 7597 } 7598 7599 // Returns the opcode of the target-specific SDNode that implements the 32-bit 7600 // form of the given Opcode. 7601 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 7602 switch (Opcode) { 7603 default: 7604 llvm_unreachable("Unexpected opcode"); 7605 case ISD::SHL: 7606 return RISCVISD::SLLW; 7607 case ISD::SRA: 7608 return RISCVISD::SRAW; 7609 case ISD::SRL: 7610 return RISCVISD::SRLW; 7611 case ISD::SDIV: 7612 return RISCVISD::DIVW; 7613 case ISD::UDIV: 7614 return RISCVISD::DIVUW; 7615 case ISD::UREM: 7616 return RISCVISD::REMUW; 7617 case ISD::ROTL: 7618 return RISCVISD::ROLW; 7619 case ISD::ROTR: 7620 return RISCVISD::RORW; 7621 } 7622 } 7623 7624 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 7625 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would 7626 // otherwise be promoted to i64, making it difficult to select the 7627 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of 7628 // type i8/i16/i32 is lost. 7629 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 7630 unsigned ExtOpc = ISD::ANY_EXTEND) { 7631 SDLoc DL(N); 7632 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 7633 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 7634 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 7635 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 7636 // ReplaceNodeResults requires we maintain the same type for the return value. 7637 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 7638 } 7639 7640 // Converts the given 32-bit operation to a i64 operation with signed extension 7641 // semantic to reduce the signed extension instructions. 7642 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 7643 SDLoc DL(N); 7644 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 7645 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 7646 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 7647 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 7648 DAG.getValueType(MVT::i32)); 7649 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 7650 } 7651 7652 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 7653 SmallVectorImpl<SDValue> &Results, 7654 SelectionDAG &DAG) const { 7655 SDLoc DL(N); 7656 switch (N->getOpcode()) { 7657 default: 7658 llvm_unreachable("Don't know how to custom type legalize this operation!"); 7659 case ISD::STRICT_FP_TO_SINT: 7660 case ISD::STRICT_FP_TO_UINT: 7661 case ISD::FP_TO_SINT: 7662 case ISD::FP_TO_UINT: { 7663 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 7664 "Unexpected custom legalisation"); 7665 bool IsStrict = N->isStrictFPOpcode(); 7666 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || 7667 N->getOpcode() == ISD::STRICT_FP_TO_SINT; 7668 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 7669 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 7670 TargetLowering::TypeSoftenFloat) { 7671 if (!isTypeLegal(Op0.getValueType())) 7672 return; 7673 if (IsStrict) { 7674 SDValue Chain = N->getOperand(0); 7675 // In absense of Zfh, promote f16 to f32, then convert. 7676 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) { 7677 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other}, 7678 {Chain, Op0}); 7679 Chain = Op0.getValue(1); 7680 } 7681 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64 7682 : RISCVISD::STRICT_FCVT_WU_RV64; 7683 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 7684 SDValue Res = DAG.getNode( 7685 Opc, DL, VTs, Chain, Op0, 7686 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 7687 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 7688 Results.push_back(Res.getValue(1)); 7689 return; 7690 } 7691 // In absense of Zfh, promote f16 to f32, then convert. 7692 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 7693 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0); 7694 7695 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 7696 SDValue Res = 7697 DAG.getNode(Opc, DL, MVT::i64, Op0, 7698 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64)); 7699 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 7700 return; 7701 } 7702 // If the FP type needs to be softened, emit a library call using the 'si' 7703 // version. If we left it to default legalization we'd end up with 'di'. If 7704 // the FP type doesn't need to be softened just let generic type 7705 // legalization promote the result type. 7706 RTLIB::Libcall LC; 7707 if (IsSigned) 7708 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 7709 else 7710 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 7711 MakeLibCallOptions CallOptions; 7712 EVT OpVT = Op0.getValueType(); 7713 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 7714 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 7715 SDValue Result; 7716 std::tie(Result, Chain) = 7717 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 7718 Results.push_back(Result); 7719 if (IsStrict) 7720 Results.push_back(Chain); 7721 break; 7722 } 7723 case ISD::READCYCLECOUNTER: { 7724 assert(!Subtarget.is64Bit() && 7725 "READCYCLECOUNTER only has custom type legalization on riscv32"); 7726 7727 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 7728 SDValue RCW = 7729 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 7730 7731 Results.push_back( 7732 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 7733 Results.push_back(RCW.getValue(2)); 7734 break; 7735 } 7736 case ISD::LOAD: { 7737 if (!ISD::isNON_EXTLoad(N)) 7738 return; 7739 7740 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the 7741 // sext_inreg we emit for ADD/SUB/MUL/SLLI. 7742 LoadSDNode *Ld = cast<LoadSDNode>(N); 7743 7744 SDLoc dl(N); 7745 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(), 7746 Ld->getBasePtr(), Ld->getMemoryVT(), 7747 Ld->getMemOperand()); 7748 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res)); 7749 Results.push_back(Res.getValue(1)); 7750 return; 7751 } 7752 case ISD::MUL: { 7753 unsigned Size = N->getSimpleValueType(0).getSizeInBits(); 7754 unsigned XLen = Subtarget.getXLen(); 7755 // This multiply needs to be expanded, try to use MULHSU+MUL if possible. 7756 if (Size > XLen) { 7757 assert(Size == (XLen * 2) && "Unexpected custom legalisation"); 7758 SDValue LHS = N->getOperand(0); 7759 SDValue RHS = N->getOperand(1); 7760 APInt HighMask = APInt::getHighBitsSet(Size, XLen); 7761 7762 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask); 7763 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask); 7764 // We need exactly one side to be unsigned. 7765 if (LHSIsU == RHSIsU) 7766 return; 7767 7768 auto MakeMULPair = [&](SDValue S, SDValue U) { 7769 MVT XLenVT = Subtarget.getXLenVT(); 7770 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S); 7771 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U); 7772 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U); 7773 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U); 7774 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi); 7775 }; 7776 7777 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen; 7778 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen; 7779 7780 // The other operand should be signed, but still prefer MULH when 7781 // possible. 7782 if (RHSIsU && LHSIsS && !RHSIsS) 7783 Results.push_back(MakeMULPair(LHS, RHS)); 7784 else if (LHSIsU && RHSIsS && !LHSIsS) 7785 Results.push_back(MakeMULPair(RHS, LHS)); 7786 7787 return; 7788 } 7789 [[fallthrough]]; 7790 } 7791 case ISD::ADD: 7792 case ISD::SUB: 7793 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 7794 "Unexpected custom legalisation"); 7795 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 7796 break; 7797 case ISD::SHL: 7798 case ISD::SRA: 7799 case ISD::SRL: 7800 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 7801 "Unexpected custom legalisation"); 7802 if (N->getOperand(1).getOpcode() != ISD::Constant) { 7803 // If we can use a BSET instruction, allow default promotion to apply. 7804 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() && 7805 isOneConstant(N->getOperand(0))) 7806 break; 7807 Results.push_back(customLegalizeToWOp(N, DAG)); 7808 break; 7809 } 7810 7811 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is 7812 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the 7813 // shift amount. 7814 if (N->getOpcode() == ISD::SHL) { 7815 SDLoc DL(N); 7816 SDValue NewOp0 = 7817 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 7818 SDValue NewOp1 = 7819 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); 7820 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1); 7821 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 7822 DAG.getValueType(MVT::i32)); 7823 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 7824 } 7825 7826 break; 7827 case ISD::ROTL: 7828 case ISD::ROTR: 7829 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 7830 "Unexpected custom legalisation"); 7831 Results.push_back(customLegalizeToWOp(N, DAG)); 7832 break; 7833 case ISD::CTTZ: 7834 case ISD::CTTZ_ZERO_UNDEF: 7835 case ISD::CTLZ: 7836 case ISD::CTLZ_ZERO_UNDEF: { 7837 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 7838 "Unexpected custom legalisation"); 7839 7840 SDValue NewOp0 = 7841 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 7842 bool IsCTZ = 7843 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; 7844 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; 7845 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); 7846 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 7847 return; 7848 } 7849 case ISD::SDIV: 7850 case ISD::UDIV: 7851 case ISD::UREM: { 7852 MVT VT = N->getSimpleValueType(0); 7853 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 7854 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 7855 "Unexpected custom legalisation"); 7856 // Don't promote division/remainder by constant since we should expand those 7857 // to multiply by magic constant. 7858 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 7859 if (N->getOperand(1).getOpcode() == ISD::Constant && 7860 !isIntDivCheap(N->getValueType(0), Attr)) 7861 return; 7862 7863 // If the input is i32, use ANY_EXTEND since the W instructions don't read 7864 // the upper 32 bits. For other types we need to sign or zero extend 7865 // based on the opcode. 7866 unsigned ExtOpc = ISD::ANY_EXTEND; 7867 if (VT != MVT::i32) 7868 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 7869 : ISD::ZERO_EXTEND; 7870 7871 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 7872 break; 7873 } 7874 case ISD::UADDO: 7875 case ISD::USUBO: { 7876 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 7877 "Unexpected custom legalisation"); 7878 bool IsAdd = N->getOpcode() == ISD::UADDO; 7879 // Create an ADDW or SUBW. 7880 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 7881 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 7882 SDValue Res = 7883 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 7884 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 7885 DAG.getValueType(MVT::i32)); 7886 7887 SDValue Overflow; 7888 if (IsAdd && isOneConstant(RHS)) { 7889 // Special case uaddo X, 1 overflowed if the addition result is 0. 7890 // The general case (X + C) < C is not necessarily beneficial. Although we 7891 // reduce the live range of X, we may introduce the materialization of 7892 // constant C, especially when the setcc result is used by branch. We have 7893 // no compare with constant and branch instructions. 7894 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, 7895 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ); 7896 } else { 7897 // Sign extend the LHS and perform an unsigned compare with the ADDW 7898 // result. Since the inputs are sign extended from i32, this is equivalent 7899 // to comparing the lower 32 bits. 7900 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 7901 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 7902 IsAdd ? ISD::SETULT : ISD::SETUGT); 7903 } 7904 7905 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 7906 Results.push_back(Overflow); 7907 return; 7908 } 7909 case ISD::UADDSAT: 7910 case ISD::USUBSAT: { 7911 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 7912 "Unexpected custom legalisation"); 7913 if (Subtarget.hasStdExtZbb()) { 7914 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 7915 // sign extend allows overflow of the lower 32 bits to be detected on 7916 // the promoted size. 7917 SDValue LHS = 7918 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 7919 SDValue RHS = 7920 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 7921 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 7922 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 7923 return; 7924 } 7925 7926 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 7927 // promotion for UADDO/USUBO. 7928 Results.push_back(expandAddSubSat(N, DAG)); 7929 return; 7930 } 7931 case ISD::ABS: { 7932 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 7933 "Unexpected custom legalisation"); 7934 7935 if (Subtarget.hasStdExtZbb()) { 7936 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. 7937 // This allows us to remember that the result is sign extended. Expanding 7938 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. 7939 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, 7940 N->getOperand(0)); 7941 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); 7942 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); 7943 return; 7944 } 7945 7946 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y) 7947 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 7948 7949 // Freeze the source so we can increase it's use count. 7950 Src = DAG.getFreeze(Src); 7951 7952 // Copy sign bit to all bits using the sraiw pattern. 7953 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src, 7954 DAG.getValueType(MVT::i32)); 7955 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill, 7956 DAG.getConstant(31, DL, MVT::i64)); 7957 7958 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill); 7959 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill); 7960 7961 // NOTE: The result is only required to be anyextended, but sext is 7962 // consistent with type legalization of sub. 7963 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes, 7964 DAG.getValueType(MVT::i32)); 7965 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 7966 return; 7967 } 7968 case ISD::BITCAST: { 7969 EVT VT = N->getValueType(0); 7970 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!"); 7971 SDValue Op0 = N->getOperand(0); 7972 EVT Op0VT = Op0.getValueType(); 7973 MVT XLenVT = Subtarget.getXLenVT(); 7974 if (VT == MVT::i16 && Op0VT == MVT::f16 && 7975 Subtarget.hasStdExtZfhOrZfhmin()) { 7976 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 7977 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 7978 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && 7979 Subtarget.hasStdExtF()) { 7980 SDValue FPConv = 7981 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 7982 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 7983 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && 7984 isTypeLegal(Op0VT)) { 7985 // Custom-legalize bitcasts from fixed-length vector types to illegal 7986 // scalar types in order to improve codegen. Bitcast the vector to a 7987 // one-element vector type whose element type is the same as the result 7988 // type, and extract the first element. 7989 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 7990 if (isTypeLegal(BVT)) { 7991 SDValue BVec = DAG.getBitcast(BVT, Op0); 7992 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 7993 DAG.getConstant(0, DL, XLenVT))); 7994 } 7995 } 7996 break; 7997 } 7998 case RISCVISD::BREV8: { 7999 MVT VT = N->getSimpleValueType(0); 8000 MVT XLenVT = Subtarget.getXLenVT(); 8001 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) && 8002 "Unexpected custom legalisation"); 8003 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); 8004 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0)); 8005 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp); 8006 // ReplaceNodeResults requires we maintain the same type for the return 8007 // value. 8008 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes)); 8009 break; 8010 } 8011 case ISD::EXTRACT_VECTOR_ELT: { 8012 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 8013 // type is illegal (currently only vXi64 RV32). 8014 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 8015 // transferred to the destination register. We issue two of these from the 8016 // upper- and lower- halves of the SEW-bit vector element, slid down to the 8017 // first element. 8018 SDValue Vec = N->getOperand(0); 8019 SDValue Idx = N->getOperand(1); 8020 8021 // The vector type hasn't been legalized yet so we can't issue target 8022 // specific nodes if it needs legalization. 8023 // FIXME: We would manually legalize if it's important. 8024 if (!isTypeLegal(Vec.getValueType())) 8025 return; 8026 8027 MVT VecVT = Vec.getSimpleValueType(); 8028 8029 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 8030 VecVT.getVectorElementType() == MVT::i64 && 8031 "Unexpected EXTRACT_VECTOR_ELT legalization"); 8032 8033 // If this is a fixed vector, we need to convert it to a scalable vector. 8034 MVT ContainerVT = VecVT; 8035 if (VecVT.isFixedLengthVector()) { 8036 ContainerVT = getContainerForFixedLengthVector(VecVT); 8037 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 8038 } 8039 8040 MVT XLenVT = Subtarget.getXLenVT(); 8041 8042 // Use a VL of 1 to avoid processing more elements than we need. 8043 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget); 8044 8045 // Unless the index is known to be 0, we must slide the vector down to get 8046 // the desired element into index 0. 8047 if (!isNullConstant(Idx)) { 8048 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, 8049 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 8050 } 8051 8052 // Extract the lower XLEN bits of the correct vector element. 8053 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 8054 8055 // To extract the upper XLEN bits of the vector element, shift the first 8056 // element right by 32 bits and re-extract the lower XLEN bits. 8057 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 8058 DAG.getUNDEF(ContainerVT), 8059 DAG.getConstant(32, DL, XLenVT), VL); 8060 SDValue LShr32 = 8061 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV, 8062 DAG.getUNDEF(ContainerVT), Mask, VL); 8063 8064 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 8065 8066 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 8067 break; 8068 } 8069 case ISD::INTRINSIC_WO_CHAIN: { 8070 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 8071 switch (IntNo) { 8072 default: 8073 llvm_unreachable( 8074 "Don't know how to custom type legalize this intrinsic!"); 8075 case Intrinsic::riscv_orc_b: { 8076 SDValue NewOp = 8077 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 8078 SDValue Res = DAG.getNode(RISCVISD::ORC_B, DL, MVT::i64, NewOp); 8079 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 8080 return; 8081 } 8082 case Intrinsic::riscv_vmv_x_s: { 8083 EVT VT = N->getValueType(0); 8084 MVT XLenVT = Subtarget.getXLenVT(); 8085 if (VT.bitsLT(XLenVT)) { 8086 // Simple case just extract using vmv.x.s and truncate. 8087 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 8088 Subtarget.getXLenVT(), N->getOperand(1)); 8089 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 8090 return; 8091 } 8092 8093 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 8094 "Unexpected custom legalization"); 8095 8096 // We need to do the move in two steps. 8097 SDValue Vec = N->getOperand(1); 8098 MVT VecVT = Vec.getSimpleValueType(); 8099 8100 // First extract the lower XLEN bits of the element. 8101 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 8102 8103 // To extract the upper XLEN bits of the vector element, shift the first 8104 // element right by 32 bits and re-extract the lower XLEN bits. 8105 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget); 8106 8107 SDValue ThirtyTwoV = 8108 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), 8109 DAG.getConstant(32, DL, XLenVT), VL); 8110 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, 8111 DAG.getUNDEF(VecVT), Mask, VL); 8112 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 8113 8114 Results.push_back( 8115 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 8116 break; 8117 } 8118 } 8119 break; 8120 } 8121 case ISD::VECREDUCE_ADD: 8122 case ISD::VECREDUCE_AND: 8123 case ISD::VECREDUCE_OR: 8124 case ISD::VECREDUCE_XOR: 8125 case ISD::VECREDUCE_SMAX: 8126 case ISD::VECREDUCE_UMAX: 8127 case ISD::VECREDUCE_SMIN: 8128 case ISD::VECREDUCE_UMIN: 8129 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 8130 Results.push_back(V); 8131 break; 8132 case ISD::VP_REDUCE_ADD: 8133 case ISD::VP_REDUCE_AND: 8134 case ISD::VP_REDUCE_OR: 8135 case ISD::VP_REDUCE_XOR: 8136 case ISD::VP_REDUCE_SMAX: 8137 case ISD::VP_REDUCE_UMAX: 8138 case ISD::VP_REDUCE_SMIN: 8139 case ISD::VP_REDUCE_UMIN: 8140 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG)) 8141 Results.push_back(V); 8142 break; 8143 case ISD::GET_ROUNDING: { 8144 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other); 8145 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0)); 8146 Results.push_back(Res.getValue(0)); 8147 Results.push_back(Res.getValue(1)); 8148 break; 8149 } 8150 } 8151 } 8152 8153 // Try to fold (<bop> x, (reduction.<bop> vec, start)) 8154 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, 8155 const RISCVSubtarget &Subtarget) { 8156 auto BinOpToRVVReduce = [](unsigned Opc) { 8157 switch (Opc) { 8158 default: 8159 llvm_unreachable("Unhandled binary to transfrom reduction"); 8160 case ISD::ADD: 8161 return RISCVISD::VECREDUCE_ADD_VL; 8162 case ISD::UMAX: 8163 return RISCVISD::VECREDUCE_UMAX_VL; 8164 case ISD::SMAX: 8165 return RISCVISD::VECREDUCE_SMAX_VL; 8166 case ISD::UMIN: 8167 return RISCVISD::VECREDUCE_UMIN_VL; 8168 case ISD::SMIN: 8169 return RISCVISD::VECREDUCE_SMIN_VL; 8170 case ISD::AND: 8171 return RISCVISD::VECREDUCE_AND_VL; 8172 case ISD::OR: 8173 return RISCVISD::VECREDUCE_OR_VL; 8174 case ISD::XOR: 8175 return RISCVISD::VECREDUCE_XOR_VL; 8176 case ISD::FADD: 8177 return RISCVISD::VECREDUCE_FADD_VL; 8178 case ISD::FMAXNUM: 8179 return RISCVISD::VECREDUCE_FMAX_VL; 8180 case ISD::FMINNUM: 8181 return RISCVISD::VECREDUCE_FMIN_VL; 8182 } 8183 }; 8184 8185 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) { 8186 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 8187 isNullConstant(V.getOperand(1)) && 8188 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc); 8189 }; 8190 8191 unsigned Opc = N->getOpcode(); 8192 unsigned ReduceIdx; 8193 if (IsReduction(N->getOperand(0), Opc)) 8194 ReduceIdx = 0; 8195 else if (IsReduction(N->getOperand(1), Opc)) 8196 ReduceIdx = 1; 8197 else 8198 return SDValue(); 8199 8200 // Skip if FADD disallows reassociation but the combiner needs. 8201 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation()) 8202 return SDValue(); 8203 8204 SDValue Extract = N->getOperand(ReduceIdx); 8205 SDValue Reduce = Extract.getOperand(0); 8206 if (!Reduce.hasOneUse()) 8207 return SDValue(); 8208 8209 SDValue ScalarV = Reduce.getOperand(2); 8210 EVT ScalarVT = ScalarV.getValueType(); 8211 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR && 8212 ScalarV.getOperand(0)->isUndef()) 8213 ScalarV = ScalarV.getOperand(1); 8214 8215 // Make sure that ScalarV is a splat with VL=1. 8216 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL && 8217 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL && 8218 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) 8219 return SDValue(); 8220 8221 if (!hasNonZeroAVL(ScalarV.getOperand(2))) 8222 return SDValue(); 8223 8224 // Check the scalar of ScalarV is neutral element 8225 // TODO: Deal with value other than neutral element. 8226 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1), 8227 0)) 8228 return SDValue(); 8229 8230 if (!ScalarV.hasOneUse()) 8231 return SDValue(); 8232 8233 SDValue NewStart = N->getOperand(1 - ReduceIdx); 8234 8235 SDLoc DL(N); 8236 SDValue NewScalarV = 8237 lowerScalarInsert(NewStart, ScalarV.getOperand(2), 8238 ScalarV.getSimpleValueType(), DL, DAG, Subtarget); 8239 8240 // If we looked through an INSERT_SUBVECTOR we need to restore it. 8241 if (ScalarVT != ScalarV.getValueType()) 8242 NewScalarV = 8243 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT), 8244 NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT())); 8245 8246 SDValue NewReduce = 8247 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), 8248 Reduce.getOperand(0), Reduce.getOperand(1), NewScalarV, 8249 Reduce.getOperand(3), Reduce.getOperand(4)); 8250 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce, 8251 Extract.getOperand(1)); 8252 } 8253 8254 // Optimize (add (shl x, c0), (shl y, c1)) -> 8255 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. 8256 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, 8257 const RISCVSubtarget &Subtarget) { 8258 // Perform this optimization only in the zba extension. 8259 if (!Subtarget.hasStdExtZba()) 8260 return SDValue(); 8261 8262 // Skip for vector types and larger types. 8263 EVT VT = N->getValueType(0); 8264 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 8265 return SDValue(); 8266 8267 // The two operand nodes must be SHL and have no other use. 8268 SDValue N0 = N->getOperand(0); 8269 SDValue N1 = N->getOperand(1); 8270 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || 8271 !N0->hasOneUse() || !N1->hasOneUse()) 8272 return SDValue(); 8273 8274 // Check c0 and c1. 8275 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 8276 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 8277 if (!N0C || !N1C) 8278 return SDValue(); 8279 int64_t C0 = N0C->getSExtValue(); 8280 int64_t C1 = N1C->getSExtValue(); 8281 if (C0 <= 0 || C1 <= 0) 8282 return SDValue(); 8283 8284 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. 8285 int64_t Bits = std::min(C0, C1); 8286 int64_t Diff = std::abs(C0 - C1); 8287 if (Diff != 1 && Diff != 2 && Diff != 3) 8288 return SDValue(); 8289 8290 // Build nodes. 8291 SDLoc DL(N); 8292 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0); 8293 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0); 8294 SDValue NA0 = 8295 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT)); 8296 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS); 8297 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT)); 8298 } 8299 8300 // Combine a constant select operand into its use: 8301 // 8302 // (and (select cond, -1, c), x) 8303 // -> (select cond, x, (and x, c)) [AllOnes=1] 8304 // (or (select cond, 0, c), x) 8305 // -> (select cond, x, (or x, c)) [AllOnes=0] 8306 // (xor (select cond, 0, c), x) 8307 // -> (select cond, x, (xor x, c)) [AllOnes=0] 8308 // (add (select cond, 0, c), x) 8309 // -> (select cond, x, (add x, c)) [AllOnes=0] 8310 // (sub x, (select cond, 0, c)) 8311 // -> (select cond, x, (sub x, c)) [AllOnes=0] 8312 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 8313 SelectionDAG &DAG, bool AllOnes, 8314 const RISCVSubtarget &Subtarget) { 8315 EVT VT = N->getValueType(0); 8316 8317 // Skip vectors. 8318 if (VT.isVector()) 8319 return SDValue(); 8320 8321 if (!Subtarget.hasShortForwardBranchOpt() || 8322 (Slct.getOpcode() != ISD::SELECT && 8323 Slct.getOpcode() != RISCVISD::SELECT_CC) || 8324 !Slct.hasOneUse()) 8325 return SDValue(); 8326 8327 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { 8328 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); 8329 }; 8330 8331 bool SwapSelectOps; 8332 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0; 8333 SDValue TrueVal = Slct.getOperand(1 + OpOffset); 8334 SDValue FalseVal = Slct.getOperand(2 + OpOffset); 8335 SDValue NonConstantVal; 8336 if (isZeroOrAllOnes(TrueVal, AllOnes)) { 8337 SwapSelectOps = false; 8338 NonConstantVal = FalseVal; 8339 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { 8340 SwapSelectOps = true; 8341 NonConstantVal = TrueVal; 8342 } else 8343 return SDValue(); 8344 8345 // Slct is now know to be the desired identity constant when CC is true. 8346 TrueVal = OtherOp; 8347 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); 8348 // Unless SwapSelectOps says the condition should be false. 8349 if (SwapSelectOps) 8350 std::swap(TrueVal, FalseVal); 8351 8352 if (Slct.getOpcode() == RISCVISD::SELECT_CC) 8353 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT, 8354 {Slct.getOperand(0), Slct.getOperand(1), 8355 Slct.getOperand(2), TrueVal, FalseVal}); 8356 8357 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 8358 {Slct.getOperand(0), TrueVal, FalseVal}); 8359 } 8360 8361 // Attempt combineSelectAndUse on each operand of a commutative operator N. 8362 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, 8363 bool AllOnes, 8364 const RISCVSubtarget &Subtarget) { 8365 SDValue N0 = N->getOperand(0); 8366 SDValue N1 = N->getOperand(1); 8367 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget)) 8368 return Result; 8369 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget)) 8370 return Result; 8371 return SDValue(); 8372 } 8373 8374 // Transform (add (mul x, c0), c1) -> 8375 // (add (mul (add x, c1/c0), c0), c1%c0). 8376 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case 8377 // that should be excluded is when c0*(c1/c0) is simm12, which will lead 8378 // to an infinite loop in DAGCombine if transformed. 8379 // Or transform (add (mul x, c0), c1) -> 8380 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0), 8381 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner 8382 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will 8383 // lead to an infinite loop in DAGCombine if transformed. 8384 // Or transform (add (mul x, c0), c1) -> 8385 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0), 8386 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner 8387 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will 8388 // lead to an infinite loop in DAGCombine if transformed. 8389 // Or transform (add (mul x, c0), c1) -> 8390 // (mul (add x, c1/c0), c0). 8391 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. 8392 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, 8393 const RISCVSubtarget &Subtarget) { 8394 // Skip for vector types and larger types. 8395 EVT VT = N->getValueType(0); 8396 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 8397 return SDValue(); 8398 // The first operand node must be a MUL and has no other use. 8399 SDValue N0 = N->getOperand(0); 8400 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL) 8401 return SDValue(); 8402 // Check if c0 and c1 match above conditions. 8403 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 8404 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8405 if (!N0C || !N1C) 8406 return SDValue(); 8407 // If N0C has multiple uses it's possible one of the cases in 8408 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result 8409 // in an infinite loop. 8410 if (!N0C->hasOneUse()) 8411 return SDValue(); 8412 int64_t C0 = N0C->getSExtValue(); 8413 int64_t C1 = N1C->getSExtValue(); 8414 int64_t CA, CB; 8415 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1)) 8416 return SDValue(); 8417 // Search for proper CA (non-zero) and CB that both are simm12. 8418 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) && 8419 !isInt<12>(C0 * (C1 / C0))) { 8420 CA = C1 / C0; 8421 CB = C1 % C0; 8422 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) && 8423 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) { 8424 CA = C1 / C0 + 1; 8425 CB = C1 % C0 - C0; 8426 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) && 8427 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) { 8428 CA = C1 / C0 - 1; 8429 CB = C1 % C0 + C0; 8430 } else 8431 return SDValue(); 8432 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). 8433 SDLoc DL(N); 8434 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0), 8435 DAG.getConstant(CA, DL, VT)); 8436 SDValue New1 = 8437 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT)); 8438 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT)); 8439 } 8440 8441 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, 8442 const RISCVSubtarget &Subtarget) { 8443 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) 8444 return V; 8445 if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) 8446 return V; 8447 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 8448 return V; 8449 // fold (add (select lhs, rhs, cc, 0, y), x) -> 8450 // (select lhs, rhs, cc, x, (add x, y)) 8451 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 8452 } 8453 8454 // Try to turn a sub boolean RHS and constant LHS into an addi. 8455 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) { 8456 SDValue N0 = N->getOperand(0); 8457 SDValue N1 = N->getOperand(1); 8458 EVT VT = N->getValueType(0); 8459 SDLoc DL(N); 8460 8461 // Require a constant LHS. 8462 auto *N0C = dyn_cast<ConstantSDNode>(N0); 8463 if (!N0C) 8464 return SDValue(); 8465 8466 // All our optimizations involve subtracting 1 from the immediate and forming 8467 // an ADDI. Make sure the new immediate is valid for an ADDI. 8468 APInt ImmValMinus1 = N0C->getAPIntValue() - 1; 8469 if (!ImmValMinus1.isSignedIntN(12)) 8470 return SDValue(); 8471 8472 SDValue NewLHS; 8473 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { 8474 // (sub constant, (setcc x, y, eq/neq)) -> 8475 // (add (setcc x, y, neq/eq), constant - 1) 8476 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get(); 8477 EVT SetCCOpVT = N1.getOperand(0).getValueType(); 8478 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger()) 8479 return SDValue(); 8480 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 8481 NewLHS = 8482 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal); 8483 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) && 8484 N1.getOperand(0).getOpcode() == ISD::SETCC) { 8485 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1). 8486 // Since setcc returns a bool the xor is equivalent to 1-setcc. 8487 NewLHS = N1.getOperand(0); 8488 } else 8489 return SDValue(); 8490 8491 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT); 8492 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS); 8493 } 8494 8495 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, 8496 const RISCVSubtarget &Subtarget) { 8497 if (SDValue V = combineSubOfBoolean(N, DAG)) 8498 return V; 8499 8500 // fold (sub x, (select lhs, rhs, cc, 0, y)) -> 8501 // (select lhs, rhs, cc, x, (sub x, y)) 8502 SDValue N0 = N->getOperand(0); 8503 SDValue N1 = N->getOperand(1); 8504 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget); 8505 } 8506 8507 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. 8508 // Legalizing setcc can introduce xors like this. Doing this transform reduces 8509 // the number of xors and may allow the xor to fold into a branch condition. 8510 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) { 8511 SDValue N0 = N->getOperand(0); 8512 SDValue N1 = N->getOperand(1); 8513 bool IsAnd = N->getOpcode() == ISD::AND; 8514 8515 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR) 8516 return SDValue(); 8517 8518 if (!N0.hasOneUse() || !N1.hasOneUse()) 8519 return SDValue(); 8520 8521 SDValue N01 = N0.getOperand(1); 8522 SDValue N11 = N1.getOperand(1); 8523 8524 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into 8525 // (xor X, -1) based on the upper bits of the other operand being 0. If the 8526 // operation is And, allow one of the Xors to use -1. 8527 if (isOneConstant(N01)) { 8528 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11))) 8529 return SDValue(); 8530 } else if (isOneConstant(N11)) { 8531 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1. 8532 if (!(IsAnd && isAllOnesConstant(N01))) 8533 return SDValue(); 8534 } else 8535 return SDValue(); 8536 8537 EVT VT = N->getValueType(0); 8538 8539 SDValue N00 = N0.getOperand(0); 8540 SDValue N10 = N1.getOperand(0); 8541 8542 // The LHS of the xors needs to be 0/1. 8543 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 8544 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask)) 8545 return SDValue(); 8546 8547 // Invert the opcode and insert a new xor. 8548 SDLoc DL(N); 8549 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 8550 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10); 8551 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT)); 8552 } 8553 8554 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, 8555 const RISCVSubtarget &Subtarget) { 8556 SDValue N0 = N->getOperand(0); 8557 EVT VT = N->getValueType(0); 8558 8559 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero 8560 // extending X. This is safe since we only need the LSB after the shift and 8561 // shift amounts larger than 31 would produce poison. If we wait until 8562 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 8563 // to use a BEXT instruction. 8564 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 && 8565 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL && 8566 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) { 8567 SDLoc DL(N0); 8568 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 8569 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 8570 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 8571 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl); 8572 } 8573 8574 return SDValue(); 8575 } 8576 8577 namespace { 8578 // Helper class contains information about comparison operation. 8579 // The first two operands of this operation are compared values and the 8580 // last one is the operation. 8581 // Compared values are stored in Ops. 8582 // Comparison operation is stored in CCode. 8583 class CmpOpInfo { 8584 static unsigned constexpr Size = 2u; 8585 8586 // Type for storing operands of compare operation. 8587 using OpsArray = std::array<SDValue, Size>; 8588 OpsArray Ops; 8589 8590 using const_iterator = OpsArray::const_iterator; 8591 const_iterator begin() const { return Ops.begin(); } 8592 const_iterator end() const { return Ops.end(); } 8593 8594 ISD::CondCode CCode; 8595 8596 unsigned CommonPos{Size}; 8597 unsigned DifferPos{Size}; 8598 8599 // Sets CommonPos and DifferPos based on incoming position 8600 // of common operand CPos. 8601 void setPositions(const_iterator CPos) { 8602 assert(CPos != Ops.end() && "Common operand has to be in OpsArray.\n"); 8603 CommonPos = CPos == Ops.begin() ? 0 : 1; 8604 DifferPos = 1 - CommonPos; 8605 assert((DifferPos == 0 || DifferPos == 1) && 8606 "Positions can be only 0 or 1."); 8607 } 8608 8609 // Private constructor of comparison info based on comparison operator. 8610 // It is private because CmpOpInfo only reasonable relative to other 8611 // comparison operator. Therefore, infos about comparison operation 8612 // have to be collected simultaneously via CmpOpInfo::getInfoAbout(). 8613 CmpOpInfo(const SDValue &CmpOp) 8614 : Ops{CmpOp.getOperand(0), CmpOp.getOperand(1)}, 8615 CCode{cast<CondCodeSDNode>(CmpOp.getOperand(2))->get()} {} 8616 8617 // Finds common operand of Op1 and Op2 and finishes filling CmpOpInfos. 8618 // Returns true if common operand is found. Otherwise - false. 8619 static bool establishCorrespondence(CmpOpInfo &Op1, CmpOpInfo &Op2) { 8620 const auto CommonOpIt1 = 8621 std::find_first_of(Op1.begin(), Op1.end(), Op2.begin(), Op2.end()); 8622 if (CommonOpIt1 == Op1.end()) 8623 return false; 8624 8625 const auto CommonOpIt2 = std::find(Op2.begin(), Op2.end(), *CommonOpIt1); 8626 assert(CommonOpIt2 != Op2.end() && 8627 "Cannot find common operand in the second comparison operation."); 8628 8629 Op1.setPositions(CommonOpIt1); 8630 Op2.setPositions(CommonOpIt2); 8631 8632 return true; 8633 } 8634 8635 public: 8636 CmpOpInfo(const CmpOpInfo &) = default; 8637 CmpOpInfo(CmpOpInfo &&) = default; 8638 8639 SDValue const &operator[](unsigned Pos) const { 8640 assert(Pos < Size && "Out of range\n"); 8641 return Ops[Pos]; 8642 } 8643 8644 // Creates infos about comparison operations CmpOp0 and CmpOp1. 8645 // If there is no common operand returns None. Otherwise, returns 8646 // correspondence info about comparison operations. 8647 static std::optional<std::pair<CmpOpInfo, CmpOpInfo>> 8648 getInfoAbout(SDValue const &CmpOp0, SDValue const &CmpOp1) { 8649 CmpOpInfo Op0{CmpOp0}; 8650 CmpOpInfo Op1{CmpOp1}; 8651 if (!establishCorrespondence(Op0, Op1)) 8652 return std::nullopt; 8653 return std::make_pair(Op0, Op1); 8654 } 8655 8656 // Returns position of common operand. 8657 unsigned getCPos() const { return CommonPos; } 8658 8659 // Returns position of differ operand. 8660 unsigned getDPos() const { return DifferPos; } 8661 8662 // Returns common operand. 8663 SDValue const &getCOp() const { return operator[](CommonPos); } 8664 8665 // Returns differ operand. 8666 SDValue const &getDOp() const { return operator[](DifferPos); } 8667 8668 // Returns consition code of comparison operation. 8669 ISD::CondCode getCondCode() const { return CCode; } 8670 }; 8671 } // namespace 8672 8673 // Verifies conditions to apply an optimization. 8674 // Returns Reference comparison code and three operands A, B, C. 8675 // Conditions for optimization: 8676 // One operand of the compasions has to be common. 8677 // This operand is written to C. 8678 // Two others operands are differend. They are written to A and B. 8679 // Comparisons has to be similar with respect to common operand C. 8680 // e.g. A < C; C > B are similar 8681 // but A < C; B > C are not. 8682 // Reference comparison code is the comparison code if 8683 // common operand is right placed. 8684 // e.g. C > A will be swapped to A < C. 8685 static std::optional<std::tuple<ISD::CondCode, SDValue, SDValue, SDValue>> 8686 verifyCompareConds(SDNode *N, SelectionDAG &DAG) { 8687 LLVM_DEBUG( 8688 dbgs() << "Checking conditions for comparison operation combining.\n";); 8689 8690 SDValue V0 = N->getOperand(0); 8691 SDValue V1 = N->getOperand(1); 8692 assert(V0.getValueType() == V1.getValueType() && 8693 "Operations must have the same value type."); 8694 8695 // Condition 1. Operations have to be used only in logic operation. 8696 if (!V0.hasOneUse() || !V1.hasOneUse()) 8697 return std::nullopt; 8698 8699 // Condition 2. Operands have to be comparison operations. 8700 if (V0.getOpcode() != ISD::SETCC || V1.getOpcode() != ISD::SETCC) 8701 return std::nullopt; 8702 8703 // Condition 3.1. Operations only with integers. 8704 if (!V0.getOperand(0).getValueType().isInteger()) 8705 return std::nullopt; 8706 8707 const auto ComparisonInfo = CmpOpInfo::getInfoAbout(V0, V1); 8708 // Condition 3.2. Common operand has to be in comparison. 8709 if (!ComparisonInfo) 8710 return std::nullopt; 8711 8712 const auto [Op0, Op1] = ComparisonInfo.value(); 8713 8714 LLVM_DEBUG(dbgs() << "Shared operands are on positions: " << Op0.getCPos() 8715 << " and " << Op1.getCPos() << '\n';); 8716 // If common operand at the first position then swap operation to convert to 8717 // strict pattern. Common operand has to be right hand side. 8718 ISD::CondCode RefCond = Op0.getCondCode(); 8719 ISD::CondCode AssistCode = Op1.getCondCode(); 8720 if (!Op0.getCPos()) 8721 RefCond = ISD::getSetCCSwappedOperands(RefCond); 8722 if (!Op1.getCPos()) 8723 AssistCode = ISD::getSetCCSwappedOperands(AssistCode); 8724 LLVM_DEBUG(dbgs() << "Reference condition is: " << RefCond << '\n';); 8725 // If there are different comparison operations then do not perform an 8726 // optimization. a < c; c < b -> will be changed to b > c. 8727 if (RefCond != AssistCode) 8728 return std::nullopt; 8729 8730 // Conditions can be only similar to Less or Greater. (>, >=, <, <=) 8731 // Applying this mask to the operation will determine Less and Greater 8732 // operations. 8733 const unsigned CmpMask = 0b110; 8734 const unsigned MaskedOpcode = CmpMask & RefCond; 8735 // If masking gave 0b110, then this is an operation NE, O or TRUE. 8736 if (MaskedOpcode == CmpMask) 8737 return std::nullopt; 8738 // If masking gave 00000, then this is an operation E, O or FALSE. 8739 if (MaskedOpcode == 0) 8740 return std::nullopt; 8741 // Everything else is similar to Less or Greater. 8742 8743 SDValue A = Op0.getDOp(); 8744 SDValue B = Op1.getDOp(); 8745 SDValue C = Op0.getCOp(); 8746 8747 LLVM_DEBUG( 8748 dbgs() << "The conditions for combining comparisons are satisfied.\n";); 8749 return std::make_tuple(RefCond, A, B, C); 8750 } 8751 8752 static ISD::NodeType getSelectionCode(bool IsUnsigned, bool IsAnd, 8753 bool IsGreaterOp) { 8754 // Codes of selection operation. The first index selects signed or unsigned, 8755 // the second index selects MIN/MAX. 8756 static constexpr ISD::NodeType SelectionCodes[2][2] = { 8757 {ISD::SMIN, ISD::SMAX}, {ISD::UMIN, ISD::UMAX}}; 8758 const bool ChooseSelCode = IsAnd ^ IsGreaterOp; 8759 return SelectionCodes[IsUnsigned][ChooseSelCode]; 8760 } 8761 8762 // Combines two comparison operation and logic operation to one selection 8763 // operation(min, max) and logic operation. Returns new constructed Node if 8764 // conditions for optimization are satisfied. 8765 static SDValue combineCmpOp(SDNode *N, SelectionDAG &DAG, 8766 const RISCVSubtarget &Subtarget) { 8767 if (!Subtarget.hasStdExtZbb()) 8768 return SDValue(); 8769 8770 const unsigned BitOpcode = N->getOpcode(); 8771 assert((BitOpcode == ISD::AND || BitOpcode == ISD::OR) && 8772 "This optimization can be used only with AND/OR operations"); 8773 8774 const auto Props = verifyCompareConds(N, DAG); 8775 // If conditions are invalidated then do not perform an optimization. 8776 if (!Props) 8777 return SDValue(); 8778 8779 const auto [RefOpcode, A, B, C] = Props.value(); 8780 const EVT CmpOpVT = A.getValueType(); 8781 8782 const bool IsGreaterOp = RefOpcode & 0b10; 8783 const bool IsUnsigned = ISD::isUnsignedIntSetCC(RefOpcode); 8784 assert((IsUnsigned || ISD::isSignedIntSetCC(RefOpcode)) && 8785 "Operation neither with signed or unsigned integers."); 8786 8787 const bool IsAnd = BitOpcode == ISD::AND; 8788 const ISD::NodeType PickCode = 8789 getSelectionCode(IsUnsigned, IsAnd, IsGreaterOp); 8790 8791 SDLoc DL(N); 8792 SDValue Pick = DAG.getNode(PickCode, DL, CmpOpVT, A, B); 8793 SDValue Cmp = 8794 DAG.getSetCC(DL, N->getOperand(0).getValueType(), Pick, C, RefOpcode); 8795 8796 return Cmp; 8797 } 8798 8799 static SDValue performANDCombine(SDNode *N, 8800 TargetLowering::DAGCombinerInfo &DCI, 8801 const RISCVSubtarget &Subtarget) { 8802 SelectionDAG &DAG = DCI.DAG; 8803 8804 SDValue N0 = N->getOperand(0); 8805 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero 8806 // extending X. This is safe since we only need the LSB after the shift and 8807 // shift amounts larger than 31 would produce poison. If we wait until 8808 // type legalization, we'll create RISCVISD::SRLW and we can't recover it 8809 // to use a BEXT instruction. 8810 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && 8811 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) && 8812 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) && 8813 N0.hasOneUse()) { 8814 SDLoc DL(N); 8815 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); 8816 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); 8817 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1); 8818 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl, 8819 DAG.getConstant(1, DL, MVT::i64)); 8820 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); 8821 } 8822 8823 if (SDValue V = combineCmpOp(N, DAG, Subtarget)) 8824 return V; 8825 8826 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 8827 return V; 8828 8829 if (DCI.isAfterLegalizeDAG()) 8830 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 8831 return V; 8832 8833 // fold (and (select lhs, rhs, cc, -1, y), x) -> 8834 // (select lhs, rhs, cc, x, (and x, y)) 8835 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget); 8836 } 8837 8838 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, 8839 const RISCVSubtarget &Subtarget) { 8840 SelectionDAG &DAG = DCI.DAG; 8841 8842 if (SDValue V = combineCmpOp(N, DAG, Subtarget)) 8843 return V; 8844 8845 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 8846 return V; 8847 8848 if (DCI.isAfterLegalizeDAG()) 8849 if (SDValue V = combineDeMorganOfBoolean(N, DAG)) 8850 return V; 8851 8852 // fold (or (select cond, 0, y), x) -> 8853 // (select cond, x, (or x, y)) 8854 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 8855 } 8856 8857 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 8858 const RISCVSubtarget &Subtarget) { 8859 SDValue N0 = N->getOperand(0); 8860 SDValue N1 = N->getOperand(1); 8861 8862 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) 8863 // NOTE: Assumes ROL being legal means ROLW is legal. 8864 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8865 if (N0.getOpcode() == RISCVISD::SLLW && 8866 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) && 8867 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) { 8868 SDLoc DL(N); 8869 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64, 8870 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1)); 8871 } 8872 8873 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) 8874 return V; 8875 // fold (xor (select cond, 0, y), x) -> 8876 // (select cond, x, (xor x, y)) 8877 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); 8878 } 8879 8880 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with 8881 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from 8882 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg 8883 // can become a sext.w instead of a shift pair. 8884 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, 8885 const RISCVSubtarget &Subtarget) { 8886 SDValue N0 = N->getOperand(0); 8887 SDValue N1 = N->getOperand(1); 8888 EVT VT = N->getValueType(0); 8889 EVT OpVT = N0.getValueType(); 8890 8891 if (OpVT != MVT::i64 || !Subtarget.is64Bit()) 8892 return SDValue(); 8893 8894 // RHS needs to be a constant. 8895 auto *N1C = dyn_cast<ConstantSDNode>(N1); 8896 if (!N1C) 8897 return SDValue(); 8898 8899 // LHS needs to be (and X, 0xffffffff). 8900 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 8901 !isa<ConstantSDNode>(N0.getOperand(1)) || 8902 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff)) 8903 return SDValue(); 8904 8905 // Looking for an equality compare. 8906 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); 8907 if (!isIntEqualitySetCC(Cond)) 8908 return SDValue(); 8909 8910 // Don't do this if the sign bit is provably zero, it will be turned back into 8911 // an AND. 8912 APInt SignMask = APInt::getOneBitSet(64, 31); 8913 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask)) 8914 return SDValue(); 8915 8916 const APInt &C1 = N1C->getAPIntValue(); 8917 8918 SDLoc dl(N); 8919 // If the constant is larger than 2^32 - 1 it is impossible for both sides 8920 // to be equal. 8921 if (C1.getActiveBits() > 32) 8922 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); 8923 8924 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT, 8925 N0.getOperand(0), DAG.getValueType(MVT::i32)); 8926 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64), 8927 dl, OpVT), Cond); 8928 } 8929 8930 static SDValue 8931 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, 8932 const RISCVSubtarget &Subtarget) { 8933 SDValue Src = N->getOperand(0); 8934 EVT VT = N->getValueType(0); 8935 8936 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) 8937 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && 8938 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16)) 8939 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT, 8940 Src.getOperand(0)); 8941 8942 return SDValue(); 8943 } 8944 8945 namespace { 8946 // Forward declaration of the structure holding the necessary information to 8947 // apply a combine. 8948 struct CombineResult; 8949 8950 /// Helper class for folding sign/zero extensions. 8951 /// In particular, this class is used for the following combines: 8952 /// add_vl -> vwadd(u) | vwadd(u)_w 8953 /// sub_vl -> vwsub(u) | vwsub(u)_w 8954 /// mul_vl -> vwmul(u) | vwmul_su 8955 /// 8956 /// An object of this class represents an operand of the operation we want to 8957 /// combine. 8958 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of 8959 /// NodeExtensionHelper for `a` and one for `b`. 8960 /// 8961 /// This class abstracts away how the extension is materialized and 8962 /// how its Mask, VL, number of users affect the combines. 8963 /// 8964 /// In particular: 8965 /// - VWADD_W is conceptually == add(op0, sext(op1)) 8966 /// - VWADDU_W == add(op0, zext(op1)) 8967 /// - VWSUB_W == sub(op0, sext(op1)) 8968 /// - VWSUBU_W == sub(op0, zext(op1)) 8969 /// 8970 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to 8971 /// zext|sext(smaller_value). 8972 struct NodeExtensionHelper { 8973 /// Records if this operand is like being zero extended. 8974 bool SupportsZExt; 8975 /// Records if this operand is like being sign extended. 8976 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For 8977 /// instance, a splat constant (e.g., 3), would support being both sign and 8978 /// zero extended. 8979 bool SupportsSExt; 8980 /// This boolean captures whether we care if this operand would still be 8981 /// around after the folding happens. 8982 bool EnforceOneUse; 8983 /// Records if this operand's mask needs to match the mask of the operation 8984 /// that it will fold into. 8985 bool CheckMask; 8986 /// Value of the Mask for this operand. 8987 /// It may be SDValue(). 8988 SDValue Mask; 8989 /// Value of the vector length operand. 8990 /// It may be SDValue(). 8991 SDValue VL; 8992 /// Original value that this NodeExtensionHelper represents. 8993 SDValue OrigOperand; 8994 8995 /// Get the value feeding the extension or the value itself. 8996 /// E.g., for zext(a), this would return a. 8997 SDValue getSource() const { 8998 switch (OrigOperand.getOpcode()) { 8999 case RISCVISD::VSEXT_VL: 9000 case RISCVISD::VZEXT_VL: 9001 return OrigOperand.getOperand(0); 9002 default: 9003 return OrigOperand; 9004 } 9005 } 9006 9007 /// Check if this instance represents a splat. 9008 bool isSplat() const { 9009 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL; 9010 } 9011 9012 /// Get or create a value that can feed \p Root with the given extension \p 9013 /// SExt. If \p SExt is None, this returns the source of this operand. 9014 /// \see ::getSource(). 9015 SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG, 9016 std::optional<bool> SExt) const { 9017 if (!SExt.has_value()) 9018 return OrigOperand; 9019 9020 MVT NarrowVT = getNarrowType(Root); 9021 9022 SDValue Source = getSource(); 9023 if (Source.getValueType() == NarrowVT) 9024 return Source; 9025 9026 unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL; 9027 9028 // If we need an extension, we should be changing the type. 9029 SDLoc DL(Root); 9030 auto [Mask, VL] = getMaskAndVL(Root); 9031 switch (OrigOperand.getOpcode()) { 9032 case RISCVISD::VSEXT_VL: 9033 case RISCVISD::VZEXT_VL: 9034 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); 9035 case RISCVISD::VMV_V_X_VL: 9036 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, 9037 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL); 9038 default: 9039 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL 9040 // and that operand should already have the right NarrowVT so no 9041 // extension should be required at this point. 9042 llvm_unreachable("Unsupported opcode"); 9043 } 9044 } 9045 9046 /// Helper function to get the narrow type for \p Root. 9047 /// The narrow type is the type of \p Root where we divided the size of each 9048 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>. 9049 /// \pre The size of the type of the elements of Root must be a multiple of 2 9050 /// and be greater than 16. 9051 static MVT getNarrowType(const SDNode *Root) { 9052 MVT VT = Root->getSimpleValueType(0); 9053 9054 // Determine the narrow size. 9055 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 9056 assert(NarrowSize >= 8 && "Trying to extend something we can't represent"); 9057 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize), 9058 VT.getVectorElementCount()); 9059 return NarrowVT; 9060 } 9061 9062 /// Return the opcode required to materialize the folding of the sign 9063 /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for 9064 /// both operands for \p Opcode. 9065 /// Put differently, get the opcode to materialize: 9066 /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b) 9067 /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b) 9068 /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). 9069 static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { 9070 switch (Opcode) { 9071 case RISCVISD::ADD_VL: 9072 case RISCVISD::VWADD_W_VL: 9073 case RISCVISD::VWADDU_W_VL: 9074 return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; 9075 case RISCVISD::MUL_VL: 9076 return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; 9077 case RISCVISD::SUB_VL: 9078 case RISCVISD::VWSUB_W_VL: 9079 case RISCVISD::VWSUBU_W_VL: 9080 return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL; 9081 default: 9082 llvm_unreachable("Unexpected opcode"); 9083 } 9084 } 9085 9086 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> 9087 /// newOpcode(a, b). 9088 static unsigned getSUOpcode(unsigned Opcode) { 9089 assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL"); 9090 return RISCVISD::VWMULSU_VL; 9091 } 9092 9093 /// Get the opcode to materialize \p Opcode(a, s|zext(b)) -> 9094 /// newOpcode(a, b). 9095 static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { 9096 switch (Opcode) { 9097 case RISCVISD::ADD_VL: 9098 return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; 9099 case RISCVISD::SUB_VL: 9100 return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; 9101 default: 9102 llvm_unreachable("Unexpected opcode"); 9103 } 9104 } 9105 9106 using CombineToTry = std::function<std::optional<CombineResult>( 9107 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, 9108 const NodeExtensionHelper & /*RHS*/)>; 9109 9110 /// Check if this node needs to be fully folded or extended for all users. 9111 bool needToPromoteOtherUsers() const { return EnforceOneUse; } 9112 9113 /// Helper method to set the various fields of this struct based on the 9114 /// type of \p Root. 9115 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) { 9116 SupportsZExt = false; 9117 SupportsSExt = false; 9118 EnforceOneUse = true; 9119 CheckMask = true; 9120 switch (OrigOperand.getOpcode()) { 9121 case RISCVISD::VZEXT_VL: 9122 SupportsZExt = true; 9123 Mask = OrigOperand.getOperand(1); 9124 VL = OrigOperand.getOperand(2); 9125 break; 9126 case RISCVISD::VSEXT_VL: 9127 SupportsSExt = true; 9128 Mask = OrigOperand.getOperand(1); 9129 VL = OrigOperand.getOperand(2); 9130 break; 9131 case RISCVISD::VMV_V_X_VL: { 9132 // Historically, we didn't care about splat values not disappearing during 9133 // combines. 9134 EnforceOneUse = false; 9135 CheckMask = false; 9136 VL = OrigOperand.getOperand(2); 9137 9138 // The operand is a splat of a scalar. 9139 9140 // The pasthru must be undef for tail agnostic. 9141 if (!OrigOperand.getOperand(0).isUndef()) 9142 break; 9143 9144 // Get the scalar value. 9145 SDValue Op = OrigOperand.getOperand(1); 9146 9147 // See if we have enough sign bits or zero bits in the scalar to use a 9148 // widening opcode by splatting to smaller element size. 9149 MVT VT = Root->getSimpleValueType(0); 9150 unsigned EltBits = VT.getScalarSizeInBits(); 9151 unsigned ScalarBits = Op.getValueSizeInBits(); 9152 // Make sure we're getting all element bits from the scalar register. 9153 // FIXME: Support implicit sign extension of vmv.v.x? 9154 if (ScalarBits < EltBits) 9155 break; 9156 9157 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 9158 // If the narrow type cannot be expressed with a legal VMV, 9159 // this is not a valid candidate. 9160 if (NarrowSize < 8) 9161 break; 9162 9163 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) 9164 SupportsSExt = true; 9165 if (DAG.MaskedValueIsZero(Op, 9166 APInt::getBitsSetFrom(ScalarBits, NarrowSize))) 9167 SupportsZExt = true; 9168 break; 9169 } 9170 default: 9171 break; 9172 } 9173 } 9174 9175 /// Check if \p Root supports any extension folding combines. 9176 static bool isSupportedRoot(const SDNode *Root) { 9177 switch (Root->getOpcode()) { 9178 case RISCVISD::ADD_VL: 9179 case RISCVISD::MUL_VL: 9180 case RISCVISD::VWADD_W_VL: 9181 case RISCVISD::VWADDU_W_VL: 9182 case RISCVISD::SUB_VL: 9183 case RISCVISD::VWSUB_W_VL: 9184 case RISCVISD::VWSUBU_W_VL: 9185 return true; 9186 default: 9187 return false; 9188 } 9189 } 9190 9191 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). 9192 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) { 9193 assert(isSupportedRoot(Root) && "Trying to build an helper with an " 9194 "unsupported root"); 9195 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); 9196 OrigOperand = Root->getOperand(OperandIdx); 9197 9198 unsigned Opc = Root->getOpcode(); 9199 switch (Opc) { 9200 // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were 9201 // <ADD|SUB>(LHS, S|ZEXT(RHS)) 9202 case RISCVISD::VWADD_W_VL: 9203 case RISCVISD::VWADDU_W_VL: 9204 case RISCVISD::VWSUB_W_VL: 9205 case RISCVISD::VWSUBU_W_VL: 9206 if (OperandIdx == 1) { 9207 SupportsZExt = 9208 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; 9209 SupportsSExt = !SupportsZExt; 9210 std::tie(Mask, VL) = getMaskAndVL(Root); 9211 CheckMask = true; 9212 // There's no existing extension here, so we don't have to worry about 9213 // making sure it gets removed. 9214 EnforceOneUse = false; 9215 break; 9216 } 9217 [[fallthrough]]; 9218 default: 9219 fillUpExtensionSupport(Root, DAG); 9220 break; 9221 } 9222 } 9223 9224 /// Check if this operand is compatible with the given vector length \p VL. 9225 bool isVLCompatible(SDValue VL) const { 9226 return this->VL != SDValue() && this->VL == VL; 9227 } 9228 9229 /// Check if this operand is compatible with the given \p Mask. 9230 bool isMaskCompatible(SDValue Mask) const { 9231 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask); 9232 } 9233 9234 /// Helper function to get the Mask and VL from \p Root. 9235 static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) { 9236 assert(isSupportedRoot(Root) && "Unexpected root"); 9237 return std::make_pair(Root->getOperand(3), Root->getOperand(4)); 9238 } 9239 9240 /// Check if the Mask and VL of this operand are compatible with \p Root. 9241 bool areVLAndMaskCompatible(const SDNode *Root) const { 9242 auto [Mask, VL] = getMaskAndVL(Root); 9243 return isMaskCompatible(Mask) && isVLCompatible(VL); 9244 } 9245 9246 /// Helper function to check if \p N is commutative with respect to the 9247 /// foldings that are supported by this class. 9248 static bool isCommutative(const SDNode *N) { 9249 switch (N->getOpcode()) { 9250 case RISCVISD::ADD_VL: 9251 case RISCVISD::MUL_VL: 9252 case RISCVISD::VWADD_W_VL: 9253 case RISCVISD::VWADDU_W_VL: 9254 return true; 9255 case RISCVISD::SUB_VL: 9256 case RISCVISD::VWSUB_W_VL: 9257 case RISCVISD::VWSUBU_W_VL: 9258 return false; 9259 default: 9260 llvm_unreachable("Unexpected opcode"); 9261 } 9262 } 9263 9264 /// Get a list of combine to try for folding extensions in \p Root. 9265 /// Note that each returned CombineToTry function doesn't actually modify 9266 /// anything. Instead they produce an optional CombineResult that if not None, 9267 /// need to be materialized for the combine to be applied. 9268 /// \see CombineResult::materialize. 9269 /// If the related CombineToTry function returns std::nullopt, that means the 9270 /// combine didn't match. 9271 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root); 9272 }; 9273 9274 /// Helper structure that holds all the necessary information to materialize a 9275 /// combine that does some extension folding. 9276 struct CombineResult { 9277 /// Opcode to be generated when materializing the combine. 9278 unsigned TargetOpcode; 9279 // No value means no extension is needed. If extension is needed, the value 9280 // indicates if it needs to be sign extended. 9281 std::optional<bool> SExtLHS; 9282 std::optional<bool> SExtRHS; 9283 /// Root of the combine. 9284 SDNode *Root; 9285 /// LHS of the TargetOpcode. 9286 NodeExtensionHelper LHS; 9287 /// RHS of the TargetOpcode. 9288 NodeExtensionHelper RHS; 9289 9290 CombineResult(unsigned TargetOpcode, SDNode *Root, 9291 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS, 9292 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS) 9293 : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS), 9294 Root(Root), LHS(LHS), RHS(RHS) {} 9295 9296 /// Return a value that uses TargetOpcode and that can be used to replace 9297 /// Root. 9298 /// The actual replacement is *not* done in that method. 9299 SDValue materialize(SelectionDAG &DAG) const { 9300 SDValue Mask, VL, Merge; 9301 std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root); 9302 Merge = Root->getOperand(2); 9303 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), 9304 LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS), 9305 RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge, 9306 Mask, VL); 9307 } 9308 }; 9309 9310 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 9311 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 9312 /// are zext) and LHS and RHS can be folded into Root. 9313 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern. 9314 /// 9315 /// \note If the pattern can match with both zext and sext, the returned 9316 /// CombineResult will feature the zext result. 9317 /// 9318 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 9319 /// can be used to apply the pattern. 9320 static std::optional<CombineResult> 9321 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, 9322 const NodeExtensionHelper &RHS, bool AllowSExt, 9323 bool AllowZExt) { 9324 assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); 9325 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) 9326 return std::nullopt; 9327 if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) 9328 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( 9329 Root->getOpcode(), /*IsSExt=*/false), 9330 Root, LHS, /*SExtLHS=*/false, RHS, 9331 /*SExtRHS=*/false); 9332 if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) 9333 return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( 9334 Root->getOpcode(), /*IsSExt=*/true), 9335 Root, LHS, /*SExtLHS=*/true, RHS, 9336 /*SExtRHS=*/true); 9337 return std::nullopt; 9338 } 9339 9340 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS)) 9341 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both 9342 /// are zext) and LHS and RHS can be folded into Root. 9343 /// 9344 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 9345 /// can be used to apply the pattern. 9346 static std::optional<CombineResult> 9347 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, 9348 const NodeExtensionHelper &RHS) { 9349 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, 9350 /*AllowZExt=*/true); 9351 } 9352 9353 /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) 9354 /// 9355 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 9356 /// can be used to apply the pattern. 9357 static std::optional<CombineResult> 9358 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, 9359 const NodeExtensionHelper &RHS) { 9360 if (!RHS.areVLAndMaskCompatible(Root)) 9361 return std::nullopt; 9362 9363 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar 9364 // sext/zext? 9365 // Control this behavior behind an option (AllowSplatInVW_W) for testing 9366 // purposes. 9367 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W)) 9368 return CombineResult( 9369 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false), 9370 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false); 9371 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W)) 9372 return CombineResult( 9373 NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true), 9374 Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true); 9375 return std::nullopt; 9376 } 9377 9378 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS)) 9379 /// 9380 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 9381 /// can be used to apply the pattern. 9382 static std::optional<CombineResult> 9383 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, 9384 const NodeExtensionHelper &RHS) { 9385 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, 9386 /*AllowZExt=*/false); 9387 } 9388 9389 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) 9390 /// 9391 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 9392 /// can be used to apply the pattern. 9393 static std::optional<CombineResult> 9394 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, 9395 const NodeExtensionHelper &RHS) { 9396 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, 9397 /*AllowZExt=*/true); 9398 } 9399 9400 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) 9401 /// 9402 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that 9403 /// can be used to apply the pattern. 9404 static std::optional<CombineResult> 9405 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, 9406 const NodeExtensionHelper &RHS) { 9407 if (!LHS.SupportsSExt || !RHS.SupportsZExt) 9408 return std::nullopt; 9409 if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) 9410 return std::nullopt; 9411 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), 9412 Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); 9413 } 9414 9415 SmallVector<NodeExtensionHelper::CombineToTry> 9416 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { 9417 SmallVector<CombineToTry> Strategies; 9418 switch (Root->getOpcode()) { 9419 case RISCVISD::ADD_VL: 9420 case RISCVISD::SUB_VL: 9421 // add|sub -> vwadd(u)|vwsub(u) 9422 Strategies.push_back(canFoldToVWWithSameExtension); 9423 // add|sub -> vwadd(u)_w|vwsub(u)_w 9424 Strategies.push_back(canFoldToVW_W); 9425 break; 9426 case RISCVISD::MUL_VL: 9427 // mul -> vwmul(u) 9428 Strategies.push_back(canFoldToVWWithSameExtension); 9429 // mul -> vwmulsu 9430 Strategies.push_back(canFoldToVW_SU); 9431 break; 9432 case RISCVISD::VWADD_W_VL: 9433 case RISCVISD::VWSUB_W_VL: 9434 // vwadd_w|vwsub_w -> vwadd|vwsub 9435 Strategies.push_back(canFoldToVWWithSEXT); 9436 break; 9437 case RISCVISD::VWADDU_W_VL: 9438 case RISCVISD::VWSUBU_W_VL: 9439 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu 9440 Strategies.push_back(canFoldToVWWithZEXT); 9441 break; 9442 default: 9443 llvm_unreachable("Unexpected opcode"); 9444 } 9445 return Strategies; 9446 } 9447 } // End anonymous namespace. 9448 9449 /// Combine a binary operation to its equivalent VW or VW_W form. 9450 /// The supported combines are: 9451 /// add_vl -> vwadd(u) | vwadd(u)_w 9452 /// sub_vl -> vwsub(u) | vwsub(u)_w 9453 /// mul_vl -> vwmul(u) | vwmul_su 9454 /// vwadd_w(u) -> vwadd(u) 9455 /// vwub_w(u) -> vwadd(u) 9456 static SDValue 9457 combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 9458 SelectionDAG &DAG = DCI.DAG; 9459 9460 assert(NodeExtensionHelper::isSupportedRoot(N) && 9461 "Shouldn't have called this method"); 9462 SmallVector<SDNode *> Worklist; 9463 SmallSet<SDNode *, 8> Inserted; 9464 Worklist.push_back(N); 9465 Inserted.insert(N); 9466 SmallVector<CombineResult> CombinesToApply; 9467 9468 while (!Worklist.empty()) { 9469 SDNode *Root = Worklist.pop_back_val(); 9470 if (!NodeExtensionHelper::isSupportedRoot(Root)) 9471 return SDValue(); 9472 9473 NodeExtensionHelper LHS(N, 0, DAG); 9474 NodeExtensionHelper RHS(N, 1, DAG); 9475 auto AppendUsersIfNeeded = [&Worklist, 9476 &Inserted](const NodeExtensionHelper &Op) { 9477 if (Op.needToPromoteOtherUsers()) { 9478 for (SDNode *TheUse : Op.OrigOperand->uses()) { 9479 if (Inserted.insert(TheUse).second) 9480 Worklist.push_back(TheUse); 9481 } 9482 } 9483 }; 9484 9485 // Control the compile time by limiting the number of node we look at in 9486 // total. 9487 if (Inserted.size() > ExtensionMaxWebSize) 9488 return SDValue(); 9489 9490 SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies = 9491 NodeExtensionHelper::getSupportedFoldings(N); 9492 9493 assert(!FoldingStrategies.empty() && "Nothing to be folded"); 9494 bool Matched = false; 9495 for (int Attempt = 0; 9496 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched; 9497 ++Attempt) { 9498 9499 for (NodeExtensionHelper::CombineToTry FoldingStrategy : 9500 FoldingStrategies) { 9501 std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS); 9502 if (Res) { 9503 Matched = true; 9504 CombinesToApply.push_back(*Res); 9505 // All the inputs that are extended need to be folded, otherwise 9506 // we would be leaving the old input (since it is may still be used), 9507 // and the new one. 9508 if (Res->SExtLHS.has_value()) 9509 AppendUsersIfNeeded(LHS); 9510 if (Res->SExtRHS.has_value()) 9511 AppendUsersIfNeeded(RHS); 9512 break; 9513 } 9514 } 9515 std::swap(LHS, RHS); 9516 } 9517 // Right now we do an all or nothing approach. 9518 if (!Matched) 9519 return SDValue(); 9520 } 9521 // Store the value for the replacement of the input node separately. 9522 SDValue InputRootReplacement; 9523 // We do the RAUW after we materialize all the combines, because some replaced 9524 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently, 9525 // some of these nodes may appear in the NodeExtensionHelpers of some of the 9526 // yet-to-be-visited CombinesToApply roots. 9527 SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; 9528 ValuesToReplace.reserve(CombinesToApply.size()); 9529 for (CombineResult Res : CombinesToApply) { 9530 SDValue NewValue = Res.materialize(DAG); 9531 if (!InputRootReplacement) { 9532 assert(Res.Root == N && 9533 "First element is expected to be the current node"); 9534 InputRootReplacement = NewValue; 9535 } else { 9536 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue); 9537 } 9538 } 9539 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) { 9540 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second); 9541 DCI.AddToWorklist(OldNewValues.second.getNode()); 9542 } 9543 return InputRootReplacement; 9544 } 9545 9546 // Fold 9547 // (fp_to_int (froundeven X)) -> fcvt X, rne 9548 // (fp_to_int (ftrunc X)) -> fcvt X, rtz 9549 // (fp_to_int (ffloor X)) -> fcvt X, rdn 9550 // (fp_to_int (fceil X)) -> fcvt X, rup 9551 // (fp_to_int (fround X)) -> fcvt X, rmm 9552 static SDValue performFP_TO_INTCombine(SDNode *N, 9553 TargetLowering::DAGCombinerInfo &DCI, 9554 const RISCVSubtarget &Subtarget) { 9555 SelectionDAG &DAG = DCI.DAG; 9556 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9557 MVT XLenVT = Subtarget.getXLenVT(); 9558 9559 SDValue Src = N->getOperand(0); 9560 9561 // Ensure the FP type is legal. 9562 if (!TLI.isTypeLegal(Src.getValueType())) 9563 return SDValue(); 9564 9565 // Don't do this for f16 with Zfhmin and not Zfh. 9566 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 9567 return SDValue(); 9568 9569 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 9570 if (FRM == RISCVFPRndMode::Invalid) 9571 return SDValue(); 9572 9573 SDLoc DL(N); 9574 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; 9575 EVT VT = N->getValueType(0); 9576 9577 if (VT.isVector() && TLI.isTypeLegal(VT)) { 9578 MVT SrcVT = Src.getSimpleValueType(); 9579 MVT SrcContainerVT = SrcVT; 9580 MVT ContainerVT = VT.getSimpleVT(); 9581 SDValue XVal = Src.getOperand(0); 9582 9583 // For widening and narrowing conversions we just combine it into a 9584 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They 9585 // end up getting lowered to their appropriate pseudo instructions based on 9586 // their operand types 9587 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 || 9588 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits()) 9589 return SDValue(); 9590 9591 // Make fixed-length vectors scalable first 9592 if (SrcVT.isFixedLengthVector()) { 9593 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget); 9594 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget); 9595 ContainerVT = 9596 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget); 9597 } 9598 9599 auto [Mask, VL] = 9600 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget); 9601 9602 SDValue FpToInt; 9603 if (FRM == RISCVFPRndMode::RTZ) { 9604 // Use the dedicated trunc static rounding mode if we're truncating so we 9605 // don't need to generate calls to fsrmi/fsrm 9606 unsigned Opc = 9607 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL; 9608 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL); 9609 } else { 9610 unsigned Opc = 9611 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL; 9612 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, 9613 DAG.getTargetConstant(FRM, DL, XLenVT), VL); 9614 } 9615 9616 // If converted from fixed-length to scalable, convert back 9617 if (VT.isFixedLengthVector()) 9618 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget); 9619 9620 return FpToInt; 9621 } 9622 9623 // Only handle XLen or i32 types. Other types narrower than XLen will 9624 // eventually be legalized to XLenVT. 9625 if (VT != MVT::i32 && VT != XLenVT) 9626 return SDValue(); 9627 9628 unsigned Opc; 9629 if (VT == XLenVT) 9630 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 9631 else 9632 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 9633 9634 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0), 9635 DAG.getTargetConstant(FRM, DL, XLenVT)); 9636 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt); 9637 } 9638 9639 // Fold 9640 // (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne)) 9641 // (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz)) 9642 // (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn)) 9643 // (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup)) 9644 // (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm)) 9645 static SDValue performFP_TO_INT_SATCombine(SDNode *N, 9646 TargetLowering::DAGCombinerInfo &DCI, 9647 const RISCVSubtarget &Subtarget) { 9648 SelectionDAG &DAG = DCI.DAG; 9649 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9650 MVT XLenVT = Subtarget.getXLenVT(); 9651 9652 // Only handle XLen types. Other types narrower than XLen will eventually be 9653 // legalized to XLenVT. 9654 EVT DstVT = N->getValueType(0); 9655 if (DstVT != XLenVT) 9656 return SDValue(); 9657 9658 SDValue Src = N->getOperand(0); 9659 9660 // Ensure the FP type is also legal. 9661 if (!TLI.isTypeLegal(Src.getValueType())) 9662 return SDValue(); 9663 9664 // Don't do this for f16 with Zfhmin and not Zfh. 9665 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) 9666 return SDValue(); 9667 9668 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 9669 9670 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode()); 9671 if (FRM == RISCVFPRndMode::Invalid) 9672 return SDValue(); 9673 9674 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; 9675 9676 unsigned Opc; 9677 if (SatVT == DstVT) 9678 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU; 9679 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 9680 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; 9681 else 9682 return SDValue(); 9683 // FIXME: Support other SatVTs by clamping before or after the conversion. 9684 9685 Src = Src.getOperand(0); 9686 9687 SDLoc DL(N); 9688 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src, 9689 DAG.getTargetConstant(FRM, DL, XLenVT)); 9690 9691 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero 9692 // extend. 9693 if (Opc == RISCVISD::FCVT_WU_RV64) 9694 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32); 9695 9696 // RISCV FP-to-int conversions saturate to the destination register size, but 9697 // don't produce 0 for nan. 9698 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 9699 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); 9700 } 9701 9702 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is 9703 // smaller than XLenVT. 9704 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, 9705 const RISCVSubtarget &Subtarget) { 9706 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension"); 9707 9708 SDValue Src = N->getOperand(0); 9709 if (Src.getOpcode() != ISD::BSWAP) 9710 return SDValue(); 9711 9712 EVT VT = N->getValueType(0); 9713 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() || 9714 !isPowerOf2_32(VT.getSizeInBits())) 9715 return SDValue(); 9716 9717 SDLoc DL(N); 9718 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0)); 9719 } 9720 9721 // Convert from one FMA opcode to another based on whether we are negating the 9722 // multiply result and/or the accumulator. 9723 // NOTE: Only supports RVV operations with VL. 9724 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { 9725 assert((NegMul || NegAcc) && "Not negating anything?"); 9726 9727 // Negating the multiply result changes ADD<->SUB and toggles 'N'. 9728 if (NegMul) { 9729 // clang-format off 9730 switch (Opcode) { 9731 default: llvm_unreachable("Unexpected opcode"); 9732 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 9733 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 9734 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 9735 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 9736 } 9737 // clang-format on 9738 } 9739 9740 // Negating the accumulator changes ADD<->SUB. 9741 if (NegAcc) { 9742 // clang-format off 9743 switch (Opcode) { 9744 default: llvm_unreachable("Unexpected opcode"); 9745 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; 9746 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; 9747 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; 9748 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; 9749 } 9750 // clang-format on 9751 } 9752 9753 return Opcode; 9754 } 9755 9756 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 9757 const RISCVSubtarget &Subtarget) { 9758 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode"); 9759 9760 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit()) 9761 return SDValue(); 9762 9763 if (!isa<ConstantSDNode>(N->getOperand(1))) 9764 return SDValue(); 9765 uint64_t ShAmt = N->getConstantOperandVal(1); 9766 if (ShAmt > 32) 9767 return SDValue(); 9768 9769 SDValue N0 = N->getOperand(0); 9770 9771 // Combine (sra (sext_inreg (shl X, C1), i32), C2) -> 9772 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of 9773 // SLLIW+SRAIW. SLLI+SRAI have compressed forms. 9774 if (ShAmt < 32 && 9775 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() && 9776 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 && 9777 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() && 9778 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 9779 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1); 9780 if (LShAmt < 32) { 9781 SDLoc ShlDL(N0.getOperand(0)); 9782 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64, 9783 N0.getOperand(0).getOperand(0), 9784 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64)); 9785 SDLoc DL(N); 9786 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl, 9787 DAG.getConstant(ShAmt + 32, DL, MVT::i64)); 9788 } 9789 } 9790 9791 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C) 9792 // FIXME: Should this be a generic combine? There's a similar combine on X86. 9793 // 9794 // Also try these folds where an add or sub is in the middle. 9795 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C) 9796 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C) 9797 SDValue Shl; 9798 ConstantSDNode *AddC = nullptr; 9799 9800 // We might have an ADD or SUB between the SRA and SHL. 9801 bool IsAdd = N0.getOpcode() == ISD::ADD; 9802 if ((IsAdd || N0.getOpcode() == ISD::SUB)) { 9803 // Other operand needs to be a constant we can modify. 9804 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0)); 9805 if (!AddC) 9806 return SDValue(); 9807 9808 // AddC needs to have at least 32 trailing zeros. 9809 if (AddC->getAPIntValue().countTrailingZeros() < 32) 9810 return SDValue(); 9811 9812 // All users should be a shift by constant less than or equal to 32. This 9813 // ensures we'll do this optimization for each of them to produce an 9814 // add/sub+sext_inreg they can all share. 9815 for (SDNode *U : N0->uses()) { 9816 if (U->getOpcode() != ISD::SRA || 9817 !isa<ConstantSDNode>(U->getOperand(1)) || 9818 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32) 9819 return SDValue(); 9820 } 9821 9822 Shl = N0.getOperand(IsAdd ? 0 : 1); 9823 } else { 9824 // Not an ADD or SUB. 9825 Shl = N0; 9826 } 9827 9828 // Look for a shift left by 32. 9829 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) || 9830 Shl.getConstantOperandVal(1) != 32) 9831 return SDValue(); 9832 9833 // We if we didn't look through an add/sub, then the shl should have one use. 9834 // If we did look through an add/sub, the sext_inreg we create is free so 9835 // we're only creating 2 new instructions. It's enough to only remove the 9836 // original sra+add/sub. 9837 if (!AddC && !Shl.hasOneUse()) 9838 return SDValue(); 9839 9840 SDLoc DL(N); 9841 SDValue In = Shl.getOperand(0); 9842 9843 // If we looked through an ADD or SUB, we need to rebuild it with the shifted 9844 // constant. 9845 if (AddC) { 9846 SDValue ShiftedAddC = 9847 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64); 9848 if (IsAdd) 9849 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC); 9850 else 9851 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In); 9852 } 9853 9854 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In, 9855 DAG.getValueType(MVT::i32)); 9856 if (ShAmt == 32) 9857 return SExt; 9858 9859 return DAG.getNode( 9860 ISD::SHL, DL, MVT::i64, SExt, 9861 DAG.getConstant(32 - ShAmt, DL, MVT::i64)); 9862 } 9863 9864 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if 9865 // the result is used as the conditon of a br_cc or select_cc we can invert, 9866 // inverting the setcc is free, and Z is 0/1. Caller will invert the 9867 // br_cc/select_cc. 9868 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) { 9869 bool IsAnd = Cond.getOpcode() == ISD::AND; 9870 if (!IsAnd && Cond.getOpcode() != ISD::OR) 9871 return SDValue(); 9872 9873 if (!Cond.hasOneUse()) 9874 return SDValue(); 9875 9876 SDValue Setcc = Cond.getOperand(0); 9877 SDValue Xor = Cond.getOperand(1); 9878 // Canonicalize setcc to LHS. 9879 if (Setcc.getOpcode() != ISD::SETCC) 9880 std::swap(Setcc, Xor); 9881 // LHS should be a setcc and RHS should be an xor. 9882 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() || 9883 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) 9884 return SDValue(); 9885 9886 // If the condition is an And, SimplifyDemandedBits may have changed 9887 // (xor Z, 1) to (not Z). 9888 SDValue Xor1 = Xor.getOperand(1); 9889 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1))) 9890 return SDValue(); 9891 9892 EVT VT = Cond.getValueType(); 9893 SDValue Xor0 = Xor.getOperand(0); 9894 9895 // The LHS of the xor needs to be 0/1. 9896 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); 9897 if (!DAG.MaskedValueIsZero(Xor0, Mask)) 9898 return SDValue(); 9899 9900 // We can only invert integer setccs. 9901 EVT SetCCOpVT = Setcc.getOperand(0).getValueType(); 9902 if (!SetCCOpVT.isScalarInteger()) 9903 return SDValue(); 9904 9905 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get(); 9906 if (ISD::isIntEqualitySetCC(CCVal)) { 9907 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); 9908 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0), 9909 Setcc.getOperand(1), CCVal); 9910 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) { 9911 // Invert (setlt 0, X) by converting to (setlt X, 1). 9912 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1), 9913 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal); 9914 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) { 9915 // (setlt X, 1) by converting to (setlt 0, X). 9916 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, 9917 DAG.getConstant(0, SDLoc(Setcc), VT), 9918 Setcc.getOperand(0), CCVal); 9919 } else 9920 return SDValue(); 9921 9922 unsigned Opc = IsAnd ? ISD::OR : ISD::AND; 9923 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0)); 9924 } 9925 9926 // Perform common combines for BR_CC and SELECT_CC condtions. 9927 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, 9928 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { 9929 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 9930 9931 // As far as arithmetic right shift always saves the sign, 9932 // shift can be omitted. 9933 // Fold setlt (sra X, N), 0 -> setlt X, 0 and 9934 // setge (sra X, N), 0 -> setge X, 0 9935 if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS.getNode())) { 9936 if ((CCVal == ISD::SETGE || CCVal == ISD::SETLT) && 9937 LHS.getOpcode() == ISD::SRA && RHSConst->isZero()) { 9938 LHS = LHS.getOperand(0); 9939 return true; 9940 } 9941 } 9942 9943 if (!ISD::isIntEqualitySetCC(CCVal)) 9944 return false; 9945 9946 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) 9947 // Sometimes the setcc is introduced after br_cc/select_cc has been formed. 9948 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 9949 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 9950 // If we're looking for eq 0 instead of ne 0, we need to invert the 9951 // condition. 9952 bool Invert = CCVal == ISD::SETEQ; 9953 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 9954 if (Invert) 9955 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 9956 9957 RHS = LHS.getOperand(1); 9958 LHS = LHS.getOperand(0); 9959 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 9960 9961 CC = DAG.getCondCode(CCVal); 9962 return true; 9963 } 9964 9965 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) 9966 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) { 9967 RHS = LHS.getOperand(1); 9968 LHS = LHS.getOperand(0); 9969 return true; 9970 } 9971 9972 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) 9973 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && 9974 LHS.getOperand(1).getOpcode() == ISD::Constant) { 9975 SDValue LHS0 = LHS.getOperand(0); 9976 if (LHS0.getOpcode() == ISD::AND && 9977 LHS0.getOperand(1).getOpcode() == ISD::Constant) { 9978 uint64_t Mask = LHS0.getConstantOperandVal(1); 9979 uint64_t ShAmt = LHS.getConstantOperandVal(1); 9980 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) { 9981 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 9982 CC = DAG.getCondCode(CCVal); 9983 9984 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; 9985 LHS = LHS0.getOperand(0); 9986 if (ShAmt != 0) 9987 LHS = 9988 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0), 9989 DAG.getConstant(ShAmt, DL, LHS.getValueType())); 9990 return true; 9991 } 9992 } 9993 } 9994 9995 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. 9996 // This can occur when legalizing some floating point comparisons. 9997 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 9998 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 9999 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 10000 CC = DAG.getCondCode(CCVal); 10001 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 10002 return true; 10003 } 10004 10005 if (isNullConstant(RHS)) { 10006 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) { 10007 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 10008 CC = DAG.getCondCode(CCVal); 10009 LHS = NewCond; 10010 return true; 10011 } 10012 } 10013 10014 return false; 10015 } 10016 10017 // Fold 10018 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)). 10019 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)). 10020 // (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)). 10021 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)). 10022 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, 10023 SDValue TrueVal, SDValue FalseVal, 10024 bool Swapped) { 10025 bool Commutative = true; 10026 switch (TrueVal.getOpcode()) { 10027 default: 10028 return SDValue(); 10029 case ISD::SUB: 10030 Commutative = false; 10031 break; 10032 case ISD::ADD: 10033 case ISD::OR: 10034 case ISD::XOR: 10035 break; 10036 } 10037 10038 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal)) 10039 return SDValue(); 10040 10041 unsigned OpToFold; 10042 if (FalseVal == TrueVal.getOperand(0)) 10043 OpToFold = 0; 10044 else if (Commutative && FalseVal == TrueVal.getOperand(1)) 10045 OpToFold = 1; 10046 else 10047 return SDValue(); 10048 10049 EVT VT = N->getValueType(0); 10050 SDLoc DL(N); 10051 SDValue Zero = DAG.getConstant(0, DL, VT); 10052 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); 10053 10054 if (Swapped) 10055 std::swap(OtherOp, Zero); 10056 SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero); 10057 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); 10058 } 10059 10060 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, 10061 const RISCVSubtarget &Subtarget) { 10062 if (Subtarget.hasShortForwardBranchOpt()) 10063 return SDValue(); 10064 10065 // Only support XLenVT. 10066 if (N->getValueType(0) != Subtarget.getXLenVT()) 10067 return SDValue(); 10068 10069 SDValue TrueVal = N->getOperand(1); 10070 SDValue FalseVal = N->getOperand(2); 10071 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false)) 10072 return V; 10073 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true); 10074 } 10075 10076 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 10077 DAGCombinerInfo &DCI) const { 10078 SelectionDAG &DAG = DCI.DAG; 10079 10080 // Helper to call SimplifyDemandedBits on an operand of N where only some low 10081 // bits are demanded. N will be added to the Worklist if it was not deleted. 10082 // Caller should return SDValue(N, 0) if this returns true. 10083 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) { 10084 SDValue Op = N->getOperand(OpNo); 10085 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits); 10086 if (!SimplifyDemandedBits(Op, Mask, DCI)) 10087 return false; 10088 10089 if (N->getOpcode() != ISD::DELETED_NODE) 10090 DCI.AddToWorklist(N); 10091 return true; 10092 }; 10093 10094 switch (N->getOpcode()) { 10095 default: 10096 break; 10097 case RISCVISD::SplitF64: { 10098 SDValue Op0 = N->getOperand(0); 10099 // If the input to SplitF64 is just BuildPairF64 then the operation is 10100 // redundant. Instead, use BuildPairF64's operands directly. 10101 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 10102 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 10103 10104 if (Op0->isUndef()) { 10105 SDValue Lo = DAG.getUNDEF(MVT::i32); 10106 SDValue Hi = DAG.getUNDEF(MVT::i32); 10107 return DCI.CombineTo(N, Lo, Hi); 10108 } 10109 10110 SDLoc DL(N); 10111 10112 // It's cheaper to materialise two 32-bit integers than to load a double 10113 // from the constant pool and transfer it to integer registers through the 10114 // stack. 10115 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 10116 APInt V = C->getValueAPF().bitcastToAPInt(); 10117 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 10118 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 10119 return DCI.CombineTo(N, Lo, Hi); 10120 } 10121 10122 // This is a target-specific version of a DAGCombine performed in 10123 // DAGCombiner::visitBITCAST. It performs the equivalent of: 10124 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 10125 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 10126 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 10127 !Op0.getNode()->hasOneUse()) 10128 break; 10129 SDValue NewSplitF64 = 10130 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 10131 Op0.getOperand(0)); 10132 SDValue Lo = NewSplitF64.getValue(0); 10133 SDValue Hi = NewSplitF64.getValue(1); 10134 APInt SignBit = APInt::getSignMask(32); 10135 if (Op0.getOpcode() == ISD::FNEG) { 10136 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 10137 DAG.getConstant(SignBit, DL, MVT::i32)); 10138 return DCI.CombineTo(N, Lo, NewHi); 10139 } 10140 assert(Op0.getOpcode() == ISD::FABS); 10141 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 10142 DAG.getConstant(~SignBit, DL, MVT::i32)); 10143 return DCI.CombineTo(N, Lo, NewHi); 10144 } 10145 case RISCVISD::SLLW: 10146 case RISCVISD::SRAW: 10147 case RISCVISD::SRLW: 10148 case RISCVISD::RORW: 10149 case RISCVISD::ROLW: { 10150 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 10151 if (SimplifyDemandedLowBitsHelper(0, 32) || 10152 SimplifyDemandedLowBitsHelper(1, 5)) 10153 return SDValue(N, 0); 10154 10155 break; 10156 } 10157 case RISCVISD::CLZW: 10158 case RISCVISD::CTZW: { 10159 // Only the lower 32 bits of the first operand are read 10160 if (SimplifyDemandedLowBitsHelper(0, 32)) 10161 return SDValue(N, 0); 10162 break; 10163 } 10164 case RISCVISD::FMV_X_ANYEXTH: 10165 case RISCVISD::FMV_X_ANYEXTW_RV64: { 10166 SDLoc DL(N); 10167 SDValue Op0 = N->getOperand(0); 10168 MVT VT = N->getSimpleValueType(0); 10169 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 10170 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 10171 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. 10172 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 && 10173 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) || 10174 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH && 10175 Op0->getOpcode() == RISCVISD::FMV_H_X)) { 10176 assert(Op0.getOperand(0).getValueType() == VT && 10177 "Unexpected value type!"); 10178 return Op0.getOperand(0); 10179 } 10180 10181 // This is a target-specific version of a DAGCombine performed in 10182 // DAGCombiner::visitBITCAST. It performs the equivalent of: 10183 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 10184 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 10185 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 10186 !Op0.getNode()->hasOneUse()) 10187 break; 10188 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0)); 10189 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; 10190 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits()); 10191 if (Op0.getOpcode() == ISD::FNEG) 10192 return DAG.getNode(ISD::XOR, DL, VT, NewFMV, 10193 DAG.getConstant(SignBit, DL, VT)); 10194 10195 assert(Op0.getOpcode() == ISD::FABS); 10196 return DAG.getNode(ISD::AND, DL, VT, NewFMV, 10197 DAG.getConstant(~SignBit, DL, VT)); 10198 } 10199 case ISD::ADD: 10200 return performADDCombine(N, DAG, Subtarget); 10201 case ISD::SUB: 10202 return performSUBCombine(N, DAG, Subtarget); 10203 case ISD::AND: 10204 return performANDCombine(N, DCI, Subtarget); 10205 case ISD::OR: 10206 return performORCombine(N, DCI, Subtarget); 10207 case ISD::XOR: 10208 return performXORCombine(N, DAG, Subtarget); 10209 case ISD::FADD: 10210 case ISD::UMAX: 10211 case ISD::UMIN: 10212 case ISD::SMAX: 10213 case ISD::SMIN: 10214 case ISD::FMAXNUM: 10215 case ISD::FMINNUM: 10216 return combineBinOpToReduce(N, DAG, Subtarget); 10217 case ISD::SETCC: 10218 return performSETCCCombine(N, DAG, Subtarget); 10219 case ISD::SIGN_EXTEND_INREG: 10220 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); 10221 case ISD::ZERO_EXTEND: 10222 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during 10223 // type legalization. This is safe because fp_to_uint produces poison if 10224 // it overflows. 10225 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { 10226 SDValue Src = N->getOperand(0); 10227 if (Src.getOpcode() == ISD::FP_TO_UINT && 10228 isTypeLegal(Src.getOperand(0).getValueType())) 10229 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, 10230 Src.getOperand(0)); 10231 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && 10232 isTypeLegal(Src.getOperand(1).getValueType())) { 10233 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); 10234 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, 10235 Src.getOperand(0), Src.getOperand(1)); 10236 DCI.CombineTo(N, Res); 10237 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); 10238 DCI.recursivelyDeleteUnusedNodes(Src.getNode()); 10239 return SDValue(N, 0); // Return N so it doesn't get rechecked. 10240 } 10241 } 10242 return SDValue(); 10243 case ISD::TRUNCATE: 10244 return performTRUNCATECombine(N, DAG, Subtarget); 10245 case ISD::SELECT: 10246 return performSELECTCombine(N, DAG, Subtarget); 10247 case RISCVISD::SELECT_CC: { 10248 // Transform 10249 SDValue LHS = N->getOperand(0); 10250 SDValue RHS = N->getOperand(1); 10251 SDValue CC = N->getOperand(2); 10252 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); 10253 SDValue TrueV = N->getOperand(3); 10254 SDValue FalseV = N->getOperand(4); 10255 SDLoc DL(N); 10256 EVT VT = N->getValueType(0); 10257 10258 // If the True and False values are the same, we don't need a select_cc. 10259 if (TrueV == FalseV) 10260 return TrueV; 10261 10262 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z 10263 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y 10264 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) && 10265 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) && 10266 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) { 10267 if (CCVal == ISD::CondCode::SETGE) 10268 std::swap(TrueV, FalseV); 10269 10270 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue(); 10271 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue(); 10272 // Only handle simm12, if it is not in this range, it can be considered as 10273 // register. 10274 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) && 10275 isInt<12>(TrueSImm - FalseSImm)) { 10276 SDValue SRA = 10277 DAG.getNode(ISD::SRA, DL, VT, LHS, 10278 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT)); 10279 SDValue AND = 10280 DAG.getNode(ISD::AND, DL, VT, SRA, 10281 DAG.getConstant(TrueSImm - FalseSImm, DL, VT)); 10282 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV); 10283 } 10284 10285 if (CCVal == ISD::CondCode::SETGE) 10286 std::swap(TrueV, FalseV); 10287 } 10288 10289 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 10290 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 10291 {LHS, RHS, CC, TrueV, FalseV}); 10292 10293 if (!Subtarget.hasShortForwardBranchOpt()) { 10294 // (select c, -1, y) -> -c | y 10295 if (isAllOnesConstant(TrueV)) { 10296 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 10297 SDValue Neg = DAG.getNegative(C, DL, VT); 10298 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV); 10299 } 10300 // (select c, y, -1) -> -!c | y 10301 if (isAllOnesConstant(FalseV)) { 10302 SDValue C = 10303 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 10304 SDValue Neg = DAG.getNegative(C, DL, VT); 10305 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV); 10306 } 10307 10308 // (select c, 0, y) -> -!c & y 10309 if (isNullConstant(TrueV)) { 10310 SDValue C = 10311 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT)); 10312 SDValue Neg = DAG.getNegative(C, DL, VT); 10313 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV); 10314 } 10315 // (select c, y, 0) -> -c & y 10316 if (isNullConstant(FalseV)) { 10317 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); 10318 SDValue Neg = DAG.getNegative(C, DL, VT); 10319 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV); 10320 } 10321 } 10322 10323 return SDValue(); 10324 } 10325 case RISCVISD::BR_CC: { 10326 SDValue LHS = N->getOperand(1); 10327 SDValue RHS = N->getOperand(2); 10328 SDValue CC = N->getOperand(3); 10329 SDLoc DL(N); 10330 10331 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) 10332 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 10333 N->getOperand(0), LHS, RHS, CC, N->getOperand(4)); 10334 10335 return SDValue(); 10336 } 10337 case ISD::BITREVERSE: 10338 return performBITREVERSECombine(N, DAG, Subtarget); 10339 case ISD::FP_TO_SINT: 10340 case ISD::FP_TO_UINT: 10341 return performFP_TO_INTCombine(N, DCI, Subtarget); 10342 case ISD::FP_TO_SINT_SAT: 10343 case ISD::FP_TO_UINT_SAT: 10344 return performFP_TO_INT_SATCombine(N, DCI, Subtarget); 10345 case ISD::FCOPYSIGN: { 10346 EVT VT = N->getValueType(0); 10347 if (!VT.isVector()) 10348 break; 10349 // There is a form of VFSGNJ which injects the negated sign of its second 10350 // operand. Try and bubble any FNEG up after the extend/round to produce 10351 // this optimized pattern. Avoid modifying cases where FP_ROUND and 10352 // TRUNC=1. 10353 SDValue In2 = N->getOperand(1); 10354 // Avoid cases where the extend/round has multiple uses, as duplicating 10355 // those is typically more expensive than removing a fneg. 10356 if (!In2.hasOneUse()) 10357 break; 10358 if (In2.getOpcode() != ISD::FP_EXTEND && 10359 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 10360 break; 10361 In2 = In2.getOperand(0); 10362 if (In2.getOpcode() != ISD::FNEG) 10363 break; 10364 SDLoc DL(N); 10365 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 10366 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 10367 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 10368 } 10369 case ISD::MGATHER: 10370 case ISD::MSCATTER: 10371 case ISD::VP_GATHER: 10372 case ISD::VP_SCATTER: { 10373 if (!DCI.isBeforeLegalize()) 10374 break; 10375 SDValue Index, ScaleOp; 10376 bool IsIndexSigned = false; 10377 if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) { 10378 Index = VPGSN->getIndex(); 10379 ScaleOp = VPGSN->getScale(); 10380 IsIndexSigned = VPGSN->isIndexSigned(); 10381 assert(!VPGSN->isIndexScaled() && 10382 "Scaled gather/scatter should not be formed"); 10383 } else { 10384 const auto *MGSN = cast<MaskedGatherScatterSDNode>(N); 10385 Index = MGSN->getIndex(); 10386 ScaleOp = MGSN->getScale(); 10387 IsIndexSigned = MGSN->isIndexSigned(); 10388 assert(!MGSN->isIndexScaled() && 10389 "Scaled gather/scatter should not be formed"); 10390 10391 } 10392 EVT IndexVT = Index.getValueType(); 10393 MVT XLenVT = Subtarget.getXLenVT(); 10394 // RISCV indexed loads only support the "unsigned unscaled" addressing 10395 // mode, so anything else must be manually legalized. 10396 bool NeedsIdxLegalization = 10397 (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT)); 10398 if (!NeedsIdxLegalization) 10399 break; 10400 10401 SDLoc DL(N); 10402 10403 // Any index legalization should first promote to XLenVT, so we don't lose 10404 // bits when scaling. This may create an illegal index type so we let 10405 // LLVM's legalization take care of the splitting. 10406 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. 10407 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 10408 IndexVT = IndexVT.changeVectorElementType(XLenVT); 10409 Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 10410 DL, IndexVT, Index); 10411 } 10412 10413 ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED; 10414 if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N)) 10415 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, 10416 {VPGN->getChain(), VPGN->getBasePtr(), Index, 10417 ScaleOp, VPGN->getMask(), 10418 VPGN->getVectorLength()}, 10419 VPGN->getMemOperand(), NewIndexTy); 10420 if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N)) 10421 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, 10422 {VPSN->getChain(), VPSN->getValue(), 10423 VPSN->getBasePtr(), Index, ScaleOp, 10424 VPSN->getMask(), VPSN->getVectorLength()}, 10425 VPSN->getMemOperand(), NewIndexTy); 10426 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) 10427 return DAG.getMaskedGather( 10428 N->getVTList(), MGN->getMemoryVT(), DL, 10429 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), 10430 MGN->getBasePtr(), Index, ScaleOp}, 10431 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); 10432 const auto *MSN = cast<MaskedScatterSDNode>(N); 10433 return DAG.getMaskedScatter( 10434 N->getVTList(), MSN->getMemoryVT(), DL, 10435 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), 10436 Index, ScaleOp}, 10437 MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); 10438 } 10439 case RISCVISD::SRA_VL: 10440 case RISCVISD::SRL_VL: 10441 case RISCVISD::SHL_VL: { 10442 SDValue ShAmt = N->getOperand(1); 10443 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 10444 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 10445 SDLoc DL(N); 10446 SDValue VL = N->getOperand(3); 10447 EVT VT = N->getValueType(0); 10448 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 10449 ShAmt.getOperand(1), VL); 10450 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, 10451 N->getOperand(2), N->getOperand(3), N->getOperand(4)); 10452 } 10453 break; 10454 } 10455 case ISD::SRA: 10456 if (SDValue V = performSRACombine(N, DAG, Subtarget)) 10457 return V; 10458 [[fallthrough]]; 10459 case ISD::SRL: 10460 case ISD::SHL: { 10461 SDValue ShAmt = N->getOperand(1); 10462 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 10463 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 10464 SDLoc DL(N); 10465 EVT VT = N->getValueType(0); 10466 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT), 10467 ShAmt.getOperand(1), 10468 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT())); 10469 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt); 10470 } 10471 break; 10472 } 10473 case RISCVISD::ADD_VL: 10474 case RISCVISD::SUB_VL: 10475 case RISCVISD::VWADD_W_VL: 10476 case RISCVISD::VWADDU_W_VL: 10477 case RISCVISD::VWSUB_W_VL: 10478 case RISCVISD::VWSUBU_W_VL: 10479 case RISCVISD::MUL_VL: 10480 return combineBinOp_VLToVWBinOp_VL(N, DCI); 10481 case RISCVISD::VFMADD_VL: 10482 case RISCVISD::VFNMADD_VL: 10483 case RISCVISD::VFMSUB_VL: 10484 case RISCVISD::VFNMSUB_VL: { 10485 // Fold FNEG_VL into FMA opcodes. 10486 SDValue A = N->getOperand(0); 10487 SDValue B = N->getOperand(1); 10488 SDValue C = N->getOperand(2); 10489 SDValue Mask = N->getOperand(3); 10490 SDValue VL = N->getOperand(4); 10491 10492 auto invertIfNegative = [&Mask, &VL](SDValue &V) { 10493 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask && 10494 V.getOperand(2) == VL) { 10495 // Return the negated input. 10496 V = V.getOperand(0); 10497 return true; 10498 } 10499 10500 return false; 10501 }; 10502 10503 bool NegA = invertIfNegative(A); 10504 bool NegB = invertIfNegative(B); 10505 bool NegC = invertIfNegative(C); 10506 10507 // If no operands are negated, we're done. 10508 if (!NegA && !NegB && !NegC) 10509 return SDValue(); 10510 10511 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); 10512 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask, 10513 VL); 10514 } 10515 case ISD::STORE: { 10516 auto *Store = cast<StoreSDNode>(N); 10517 SDValue Val = Store->getValue(); 10518 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. 10519 // vfmv.f.s is represented as extract element from 0. Match it late to avoid 10520 // any illegal types. 10521 if (Val.getOpcode() == RISCVISD::VMV_X_S || 10522 (DCI.isAfterLegalizeDAG() && 10523 Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 10524 isNullConstant(Val.getOperand(1)))) { 10525 SDValue Src = Val.getOperand(0); 10526 MVT VecVT = Src.getSimpleValueType(); 10527 EVT MemVT = Store->getMemoryVT(); 10528 // VecVT should be scalable and memory VT should match the element type. 10529 if (VecVT.isScalableVector() && 10530 MemVT == VecVT.getVectorElementType()) { 10531 SDLoc DL(N); 10532 MVT MaskVT = getMaskTypeFor(VecVT); 10533 return DAG.getStoreVP( 10534 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(), 10535 DAG.getConstant(1, DL, MaskVT), 10536 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT, 10537 Store->getMemOperand(), Store->getAddressingMode(), 10538 Store->isTruncatingStore(), /*IsCompress*/ false); 10539 } 10540 } 10541 10542 break; 10543 } 10544 case ISD::SPLAT_VECTOR: { 10545 EVT VT = N->getValueType(0); 10546 // Only perform this combine on legal MVT types. 10547 if (!isTypeLegal(VT)) 10548 break; 10549 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N, 10550 DAG, Subtarget)) 10551 return Gather; 10552 break; 10553 } 10554 case RISCVISD::VMV_V_X_VL: { 10555 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the 10556 // scalar input. 10557 unsigned ScalarSize = N->getOperand(1).getValueSizeInBits(); 10558 unsigned EltWidth = N->getValueType(0).getScalarSizeInBits(); 10559 if (ScalarSize > EltWidth && N->getOperand(0).isUndef()) 10560 if (SimplifyDemandedLowBitsHelper(1, EltWidth)) 10561 return SDValue(N, 0); 10562 10563 break; 10564 } 10565 case RISCVISD::VFMV_S_F_VL: { 10566 SDValue Src = N->getOperand(1); 10567 // Try to remove vector->scalar->vector if the scalar->vector is inserting 10568 // into an undef vector. 10569 // TODO: Could use a vslide or vmv.v.v for non-undef. 10570 if (N->getOperand(0).isUndef() && 10571 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 10572 isNullConstant(Src.getOperand(1)) && 10573 Src.getOperand(0).getValueType().isScalableVector()) { 10574 EVT VT = N->getValueType(0); 10575 EVT SrcVT = Src.getOperand(0).getValueType(); 10576 assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); 10577 // Widths match, just return the original vector. 10578 if (SrcVT == VT) 10579 return Src.getOperand(0); 10580 // TODO: Use insert_subvector/extract_subvector to change widen/narrow? 10581 } 10582 break; 10583 } 10584 case ISD::INTRINSIC_WO_CHAIN: { 10585 unsigned IntNo = N->getConstantOperandVal(0); 10586 switch (IntNo) { 10587 // By default we do not combine any intrinsic. 10588 default: 10589 return SDValue(); 10590 case Intrinsic::riscv_vcpop: 10591 case Intrinsic::riscv_vcpop_mask: 10592 case Intrinsic::riscv_vfirst: 10593 case Intrinsic::riscv_vfirst_mask: { 10594 SDValue VL = N->getOperand(2); 10595 if (IntNo == Intrinsic::riscv_vcpop_mask || 10596 IntNo == Intrinsic::riscv_vfirst_mask) 10597 VL = N->getOperand(3); 10598 if (!isNullConstant(VL)) 10599 return SDValue(); 10600 // If VL is 0, vcpop -> li 0, vfirst -> li -1. 10601 SDLoc DL(N); 10602 EVT VT = N->getValueType(0); 10603 if (IntNo == Intrinsic::riscv_vfirst || 10604 IntNo == Intrinsic::riscv_vfirst_mask) 10605 return DAG.getConstant(-1, DL, VT); 10606 return DAG.getConstant(0, DL, VT); 10607 } 10608 } 10609 } 10610 case ISD::BITCAST: { 10611 assert(Subtarget.useRVVForFixedLengthVectors()); 10612 SDValue N0 = N->getOperand(0); 10613 EVT VT = N->getValueType(0); 10614 EVT SrcVT = N0.getValueType(); 10615 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer 10616 // type, widen both sides to avoid a trip through memory. 10617 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) && 10618 VT.isScalarInteger()) { 10619 unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); 10620 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); 10621 Ops[0] = N0; 10622 SDLoc DL(N); 10623 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops); 10624 N0 = DAG.getBitcast(MVT::i8, N0); 10625 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0); 10626 } 10627 10628 return SDValue(); 10629 } 10630 } 10631 10632 return SDValue(); 10633 } 10634 10635 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 10636 const SDNode *N, CombineLevel Level) const { 10637 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || 10638 N->getOpcode() == ISD::SRL) && 10639 "Expected shift op"); 10640 10641 // The following folds are only desirable if `(OP _, c1 << c2)` can be 10642 // materialised in fewer instructions than `(OP _, c1)`: 10643 // 10644 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 10645 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 10646 SDValue N0 = N->getOperand(0); 10647 EVT Ty = N0.getValueType(); 10648 if (Ty.isScalarInteger() && 10649 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 10650 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 10651 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 10652 if (C1 && C2) { 10653 const APInt &C1Int = C1->getAPIntValue(); 10654 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 10655 10656 // We can materialise `c1 << c2` into an add immediate, so it's "free", 10657 // and the combine should happen, to potentially allow further combines 10658 // later. 10659 if (ShiftedC1Int.getMinSignedBits() <= 64 && 10660 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 10661 return true; 10662 10663 // We can materialise `c1` in an add immediate, so it's "free", and the 10664 // combine should be prevented. 10665 if (C1Int.getMinSignedBits() <= 64 && 10666 isLegalAddImmediate(C1Int.getSExtValue())) 10667 return false; 10668 10669 // Neither constant will fit into an immediate, so find materialisation 10670 // costs. 10671 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 10672 Subtarget.getFeatureBits(), 10673 /*CompressionCost*/true); 10674 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 10675 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(), 10676 /*CompressionCost*/true); 10677 10678 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 10679 // combine should be prevented. 10680 if (C1Cost < ShiftedC1Cost) 10681 return false; 10682 } 10683 } 10684 return true; 10685 } 10686 10687 bool RISCVTargetLowering::targetShrinkDemandedConstant( 10688 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 10689 TargetLoweringOpt &TLO) const { 10690 // Delay this optimization as late as possible. 10691 if (!TLO.LegalOps) 10692 return false; 10693 10694 EVT VT = Op.getValueType(); 10695 if (VT.isVector()) 10696 return false; 10697 10698 unsigned Opcode = Op.getOpcode(); 10699 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) 10700 return false; 10701 10702 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 10703 if (!C) 10704 return false; 10705 10706 const APInt &Mask = C->getAPIntValue(); 10707 10708 // Clear all non-demanded bits initially. 10709 APInt ShrunkMask = Mask & DemandedBits; 10710 10711 // Try to make a smaller immediate by setting undemanded bits. 10712 10713 APInt ExpandedMask = Mask | ~DemandedBits; 10714 10715 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { 10716 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); 10717 }; 10718 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { 10719 if (NewMask == Mask) 10720 return true; 10721 SDLoc DL(Op); 10722 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType()); 10723 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), 10724 Op.getOperand(0), NewC); 10725 return TLO.CombineTo(Op, NewOp); 10726 }; 10727 10728 // If the shrunk mask fits in sign extended 12 bits, let the target 10729 // independent code apply it. 10730 if (ShrunkMask.isSignedIntN(12)) 10731 return false; 10732 10733 // And has a few special cases for zext. 10734 if (Opcode == ISD::AND) { 10735 // Preserve (and X, 0xffff), if zext.h exists use zext.h, 10736 // otherwise use SLLI + SRLI. 10737 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); 10738 if (IsLegalMask(NewMask)) 10739 return UseMask(NewMask); 10740 10741 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. 10742 if (VT == MVT::i64) { 10743 APInt NewMask = APInt(64, 0xffffffff); 10744 if (IsLegalMask(NewMask)) 10745 return UseMask(NewMask); 10746 } 10747 } 10748 10749 // For the remaining optimizations, we need to be able to make a negative 10750 // number through a combination of mask and undemanded bits. 10751 if (!ExpandedMask.isNegative()) 10752 return false; 10753 10754 // What is the fewest number of bits we need to represent the negative number. 10755 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 10756 10757 // Try to make a 12 bit negative immediate. If that fails try to make a 32 10758 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 10759 // If we can't create a simm12, we shouldn't change opaque constants. 10760 APInt NewMask = ShrunkMask; 10761 if (MinSignedBits <= 12) 10762 NewMask.setBitsFrom(11); 10763 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 10764 NewMask.setBitsFrom(31); 10765 else 10766 return false; 10767 10768 // Check that our new mask is a subset of the demanded mask. 10769 assert(IsLegalMask(NewMask)); 10770 return UseMask(NewMask); 10771 } 10772 10773 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) { 10774 static const uint64_t GREVMasks[] = { 10775 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 10776 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 10777 10778 for (unsigned Stage = 0; Stage != 6; ++Stage) { 10779 unsigned Shift = 1 << Stage; 10780 if (ShAmt & Shift) { 10781 uint64_t Mask = GREVMasks[Stage]; 10782 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); 10783 if (IsGORC) 10784 Res |= x; 10785 x = Res; 10786 } 10787 } 10788 10789 return x; 10790 } 10791 10792 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 10793 KnownBits &Known, 10794 const APInt &DemandedElts, 10795 const SelectionDAG &DAG, 10796 unsigned Depth) const { 10797 unsigned BitWidth = Known.getBitWidth(); 10798 unsigned Opc = Op.getOpcode(); 10799 assert((Opc >= ISD::BUILTIN_OP_END || 10800 Opc == ISD::INTRINSIC_WO_CHAIN || 10801 Opc == ISD::INTRINSIC_W_CHAIN || 10802 Opc == ISD::INTRINSIC_VOID) && 10803 "Should use MaskedValueIsZero if you don't know whether Op" 10804 " is a target node!"); 10805 10806 Known.resetAll(); 10807 switch (Opc) { 10808 default: break; 10809 case RISCVISD::SELECT_CC: { 10810 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 10811 // If we don't know any bits, early out. 10812 if (Known.isUnknown()) 10813 break; 10814 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 10815 10816 // Only known if known in both the LHS and RHS. 10817 Known = KnownBits::commonBits(Known, Known2); 10818 break; 10819 } 10820 case RISCVISD::REMUW: { 10821 KnownBits Known2; 10822 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 10823 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 10824 // We only care about the lower 32 bits. 10825 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 10826 // Restore the original width by sign extending. 10827 Known = Known.sext(BitWidth); 10828 break; 10829 } 10830 case RISCVISD::DIVUW: { 10831 KnownBits Known2; 10832 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 10833 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 10834 // We only care about the lower 32 bits. 10835 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 10836 // Restore the original width by sign extending. 10837 Known = Known.sext(BitWidth); 10838 break; 10839 } 10840 case RISCVISD::CTZW: { 10841 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 10842 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); 10843 unsigned LowBits = llvm::bit_width(PossibleTZ); 10844 Known.Zero.setBitsFrom(LowBits); 10845 break; 10846 } 10847 case RISCVISD::CLZW: { 10848 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 10849 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros(); 10850 unsigned LowBits = llvm::bit_width(PossibleLZ); 10851 Known.Zero.setBitsFrom(LowBits); 10852 break; 10853 } 10854 case RISCVISD::BREV8: 10855 case RISCVISD::ORC_B: { 10856 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a 10857 // control value of 7 is equivalent to brev8 and orc.b. 10858 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 10859 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B; 10860 // To compute zeros, we need to invert the value and invert it back after. 10861 Known.Zero = 10862 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC); 10863 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC); 10864 break; 10865 } 10866 case RISCVISD::READ_VLENB: { 10867 // We can use the minimum and maximum VLEN values to bound VLENB. We 10868 // know VLEN must be a power of two. 10869 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8; 10870 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8; 10871 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?"); 10872 Known.Zero.setLowBits(Log2_32(MinVLenB)); 10873 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1); 10874 if (MaxVLenB == MinVLenB) 10875 Known.One.setBit(Log2_32(MinVLenB)); 10876 break; 10877 } 10878 case ISD::INTRINSIC_W_CHAIN: 10879 case ISD::INTRINSIC_WO_CHAIN: { 10880 unsigned IntNo = 10881 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1); 10882 switch (IntNo) { 10883 default: 10884 // We can't do anything for most intrinsics. 10885 break; 10886 case Intrinsic::riscv_vsetvli: 10887 case Intrinsic::riscv_vsetvlimax: 10888 case Intrinsic::riscv_vsetvli_opt: 10889 case Intrinsic::riscv_vsetvlimax_opt: 10890 // Assume that VL output is positive and would fit in an int32_t. 10891 // TODO: VLEN might be capped at 16 bits in a future V spec update. 10892 if (BitWidth >= 32) 10893 Known.Zero.setBitsFrom(31); 10894 break; 10895 } 10896 break; 10897 } 10898 } 10899 } 10900 10901 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 10902 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 10903 unsigned Depth) const { 10904 switch (Op.getOpcode()) { 10905 default: 10906 break; 10907 case RISCVISD::SELECT_CC: { 10908 unsigned Tmp = 10909 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1); 10910 if (Tmp == 1) return 1; // Early out. 10911 unsigned Tmp2 = 10912 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1); 10913 return std::min(Tmp, Tmp2); 10914 } 10915 case RISCVISD::ABSW: { 10916 // We expand this at isel to negw+max. The result will have 33 sign bits 10917 // if the input has at least 33 sign bits. 10918 unsigned Tmp = 10919 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 10920 if (Tmp < 33) return 1; 10921 return 33; 10922 } 10923 case RISCVISD::SLLW: 10924 case RISCVISD::SRAW: 10925 case RISCVISD::SRLW: 10926 case RISCVISD::DIVW: 10927 case RISCVISD::DIVUW: 10928 case RISCVISD::REMUW: 10929 case RISCVISD::ROLW: 10930 case RISCVISD::RORW: 10931 case RISCVISD::FCVT_W_RV64: 10932 case RISCVISD::FCVT_WU_RV64: 10933 case RISCVISD::STRICT_FCVT_W_RV64: 10934 case RISCVISD::STRICT_FCVT_WU_RV64: 10935 // TODO: As the result is sign-extended, this is conservatively correct. A 10936 // more precise answer could be calculated for SRAW depending on known 10937 // bits in the shift amount. 10938 return 33; 10939 case RISCVISD::VMV_X_S: { 10940 // The number of sign bits of the scalar result is computed by obtaining the 10941 // element type of the input vector operand, subtracting its width from the 10942 // XLEN, and then adding one (sign bit within the element type). If the 10943 // element type is wider than XLen, the least-significant XLEN bits are 10944 // taken. 10945 unsigned XLen = Subtarget.getXLen(); 10946 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits(); 10947 if (EltBits <= XLen) 10948 return XLen - EltBits + 1; 10949 break; 10950 } 10951 case ISD::INTRINSIC_W_CHAIN: { 10952 unsigned IntNo = Op.getConstantOperandVal(1); 10953 switch (IntNo) { 10954 default: 10955 break; 10956 case Intrinsic::riscv_masked_atomicrmw_xchg_i64: 10957 case Intrinsic::riscv_masked_atomicrmw_add_i64: 10958 case Intrinsic::riscv_masked_atomicrmw_sub_i64: 10959 case Intrinsic::riscv_masked_atomicrmw_nand_i64: 10960 case Intrinsic::riscv_masked_atomicrmw_max_i64: 10961 case Intrinsic::riscv_masked_atomicrmw_min_i64: 10962 case Intrinsic::riscv_masked_atomicrmw_umax_i64: 10963 case Intrinsic::riscv_masked_atomicrmw_umin_i64: 10964 case Intrinsic::riscv_masked_cmpxchg_i64: 10965 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated 10966 // narrow atomic operation. These are implemented using atomic 10967 // operations at the minimum supported atomicrmw/cmpxchg width whose 10968 // result is then sign extended to XLEN. With +A, the minimum width is 10969 // 32 for both 64 and 32. 10970 assert(Subtarget.getXLen() == 64); 10971 assert(getMinCmpXchgSizeInBits() == 32); 10972 assert(Subtarget.hasStdExtA()); 10973 return 33; 10974 } 10975 } 10976 } 10977 10978 return 1; 10979 } 10980 10981 const Constant * 10982 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const { 10983 assert(Ld && "Unexpected null LoadSDNode"); 10984 if (!ISD::isNormalLoad(Ld)) 10985 return nullptr; 10986 10987 SDValue Ptr = Ld->getBasePtr(); 10988 10989 // Only constant pools with no offset are supported. 10990 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * { 10991 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); 10992 if (!CNode || CNode->isMachineConstantPoolEntry() || 10993 CNode->getOffset() != 0) 10994 return nullptr; 10995 10996 return CNode; 10997 }; 10998 10999 // Simple case, LLA. 11000 if (Ptr.getOpcode() == RISCVISD::LLA) { 11001 auto *CNode = GetSupportedConstantPool(Ptr); 11002 if (!CNode || CNode->getTargetFlags() != 0) 11003 return nullptr; 11004 11005 return CNode->getConstVal(); 11006 } 11007 11008 // Look for a HI and ADD_LO pair. 11009 if (Ptr.getOpcode() != RISCVISD::ADD_LO || 11010 Ptr.getOperand(0).getOpcode() != RISCVISD::HI) 11011 return nullptr; 11012 11013 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1)); 11014 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0)); 11015 11016 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO || 11017 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI) 11018 return nullptr; 11019 11020 if (CNodeLo->getConstVal() != CNodeHi->getConstVal()) 11021 return nullptr; 11022 11023 return CNodeLo->getConstVal(); 11024 } 11025 11026 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 11027 MachineBasicBlock *BB) { 11028 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 11029 11030 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 11031 // Should the count have wrapped while it was being read, we need to try 11032 // again. 11033 // ... 11034 // read: 11035 // rdcycleh x3 # load high word of cycle 11036 // rdcycle x2 # load low word of cycle 11037 // rdcycleh x4 # load high word of cycle 11038 // bne x3, x4, read # check if high word reads match, otherwise try again 11039 // ... 11040 11041 MachineFunction &MF = *BB->getParent(); 11042 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 11043 MachineFunction::iterator It = ++BB->getIterator(); 11044 11045 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 11046 MF.insert(It, LoopMBB); 11047 11048 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 11049 MF.insert(It, DoneMBB); 11050 11051 // Transfer the remainder of BB and its successor edges to DoneMBB. 11052 DoneMBB->splice(DoneMBB->begin(), BB, 11053 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 11054 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 11055 11056 BB->addSuccessor(LoopMBB); 11057 11058 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 11059 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 11060 Register LoReg = MI.getOperand(0).getReg(); 11061 Register HiReg = MI.getOperand(1).getReg(); 11062 DebugLoc DL = MI.getDebugLoc(); 11063 11064 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 11065 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 11066 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 11067 .addReg(RISCV::X0); 11068 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 11069 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 11070 .addReg(RISCV::X0); 11071 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 11072 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 11073 .addReg(RISCV::X0); 11074 11075 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 11076 .addReg(HiReg) 11077 .addReg(ReadAgainReg) 11078 .addMBB(LoopMBB); 11079 11080 LoopMBB->addSuccessor(LoopMBB); 11081 LoopMBB->addSuccessor(DoneMBB); 11082 11083 MI.eraseFromParent(); 11084 11085 return DoneMBB; 11086 } 11087 11088 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 11089 MachineBasicBlock *BB) { 11090 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 11091 11092 MachineFunction &MF = *BB->getParent(); 11093 DebugLoc DL = MI.getDebugLoc(); 11094 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 11095 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 11096 Register LoReg = MI.getOperand(0).getReg(); 11097 Register HiReg = MI.getOperand(1).getReg(); 11098 Register SrcReg = MI.getOperand(2).getReg(); 11099 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 11100 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 11101 11102 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 11103 RI, Register()); 11104 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 11105 MachineMemOperand *MMOLo = 11106 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 11107 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 11108 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 11109 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 11110 .addFrameIndex(FI) 11111 .addImm(0) 11112 .addMemOperand(MMOLo); 11113 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 11114 .addFrameIndex(FI) 11115 .addImm(4) 11116 .addMemOperand(MMOHi); 11117 MI.eraseFromParent(); // The pseudo instruction is gone now. 11118 return BB; 11119 } 11120 11121 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 11122 MachineBasicBlock *BB) { 11123 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 11124 "Unexpected instruction"); 11125 11126 MachineFunction &MF = *BB->getParent(); 11127 DebugLoc DL = MI.getDebugLoc(); 11128 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 11129 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 11130 Register DstReg = MI.getOperand(0).getReg(); 11131 Register LoReg = MI.getOperand(1).getReg(); 11132 Register HiReg = MI.getOperand(2).getReg(); 11133 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 11134 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 11135 11136 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 11137 MachineMemOperand *MMOLo = 11138 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 11139 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 11140 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 11141 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 11142 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 11143 .addFrameIndex(FI) 11144 .addImm(0) 11145 .addMemOperand(MMOLo); 11146 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 11147 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 11148 .addFrameIndex(FI) 11149 .addImm(4) 11150 .addMemOperand(MMOHi); 11151 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register()); 11152 MI.eraseFromParent(); // The pseudo instruction is gone now. 11153 return BB; 11154 } 11155 11156 static bool isSelectPseudo(MachineInstr &MI) { 11157 switch (MI.getOpcode()) { 11158 default: 11159 return false; 11160 case RISCV::Select_GPR_Using_CC_GPR: 11161 case RISCV::Select_FPR16_Using_CC_GPR: 11162 case RISCV::Select_FPR32_Using_CC_GPR: 11163 case RISCV::Select_FPR64_Using_CC_GPR: 11164 return true; 11165 } 11166 } 11167 11168 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, 11169 unsigned RelOpcode, unsigned EqOpcode, 11170 const RISCVSubtarget &Subtarget) { 11171 DebugLoc DL = MI.getDebugLoc(); 11172 Register DstReg = MI.getOperand(0).getReg(); 11173 Register Src1Reg = MI.getOperand(1).getReg(); 11174 Register Src2Reg = MI.getOperand(2).getReg(); 11175 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 11176 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass); 11177 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 11178 11179 // Save the current FFLAGS. 11180 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags); 11181 11182 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg) 11183 .addReg(Src1Reg) 11184 .addReg(Src2Reg); 11185 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 11186 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 11187 11188 // Restore the FFLAGS. 11189 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 11190 .addReg(SavedFFlags, RegState::Kill); 11191 11192 // Issue a dummy FEQ opcode to raise exception for signaling NaNs. 11193 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0) 11194 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill())) 11195 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill())); 11196 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 11197 MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept); 11198 11199 // Erase the pseudoinstruction. 11200 MI.eraseFromParent(); 11201 return BB; 11202 } 11203 11204 static MachineBasicBlock * 11205 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, 11206 MachineBasicBlock *ThisMBB, 11207 const RISCVSubtarget &Subtarget) { 11208 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5) 11209 // Without this, custom-inserter would have generated: 11210 // 11211 // A 11212 // | \ 11213 // | B 11214 // | / 11215 // C 11216 // | \ 11217 // | D 11218 // | / 11219 // E 11220 // 11221 // A: X = ...; Y = ... 11222 // B: empty 11223 // C: Z = PHI [X, A], [Y, B] 11224 // D: empty 11225 // E: PHI [X, C], [Z, D] 11226 // 11227 // If we lower both Select_FPRX_ in a single step, we can instead generate: 11228 // 11229 // A 11230 // | \ 11231 // | C 11232 // | /| 11233 // |/ | 11234 // | | 11235 // | D 11236 // | / 11237 // E 11238 // 11239 // A: X = ...; Y = ... 11240 // D: empty 11241 // E: PHI [X, A], [X, C], [Y, D] 11242 11243 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 11244 const DebugLoc &DL = First.getDebugLoc(); 11245 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); 11246 MachineFunction *F = ThisMBB->getParent(); 11247 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB); 11248 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB); 11249 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 11250 MachineFunction::iterator It = ++ThisMBB->getIterator(); 11251 F->insert(It, FirstMBB); 11252 F->insert(It, SecondMBB); 11253 F->insert(It, SinkMBB); 11254 11255 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. 11256 SinkMBB->splice(SinkMBB->begin(), ThisMBB, 11257 std::next(MachineBasicBlock::iterator(First)), 11258 ThisMBB->end()); 11259 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); 11260 11261 // Fallthrough block for ThisMBB. 11262 ThisMBB->addSuccessor(FirstMBB); 11263 // Fallthrough block for FirstMBB. 11264 FirstMBB->addSuccessor(SecondMBB); 11265 ThisMBB->addSuccessor(SinkMBB); 11266 FirstMBB->addSuccessor(SinkMBB); 11267 // This is fallthrough. 11268 SecondMBB->addSuccessor(SinkMBB); 11269 11270 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm()); 11271 Register FLHS = First.getOperand(1).getReg(); 11272 Register FRHS = First.getOperand(2).getReg(); 11273 // Insert appropriate branch. 11274 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC)) 11275 .addReg(FLHS) 11276 .addReg(FRHS) 11277 .addMBB(SinkMBB); 11278 11279 Register SLHS = Second.getOperand(1).getReg(); 11280 Register SRHS = Second.getOperand(2).getReg(); 11281 Register Op1Reg4 = First.getOperand(4).getReg(); 11282 Register Op1Reg5 = First.getOperand(5).getReg(); 11283 11284 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm()); 11285 // Insert appropriate branch. 11286 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC)) 11287 .addReg(SLHS) 11288 .addReg(SRHS) 11289 .addMBB(SinkMBB); 11290 11291 Register DestReg = Second.getOperand(0).getReg(); 11292 Register Op2Reg4 = Second.getOperand(4).getReg(); 11293 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg) 11294 .addReg(Op2Reg4) 11295 .addMBB(ThisMBB) 11296 .addReg(Op1Reg4) 11297 .addMBB(FirstMBB) 11298 .addReg(Op1Reg5) 11299 .addMBB(SecondMBB); 11300 11301 // Now remove the Select_FPRX_s. 11302 First.eraseFromParent(); 11303 Second.eraseFromParent(); 11304 return SinkMBB; 11305 } 11306 11307 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 11308 MachineBasicBlock *BB, 11309 const RISCVSubtarget &Subtarget) { 11310 // To "insert" Select_* instructions, we actually have to insert the triangle 11311 // control-flow pattern. The incoming instructions know the destination vreg 11312 // to set, the condition code register to branch on, the true/false values to 11313 // select between, and the condcode to use to select the appropriate branch. 11314 // 11315 // We produce the following control flow: 11316 // HeadMBB 11317 // | \ 11318 // | IfFalseMBB 11319 // | / 11320 // TailMBB 11321 // 11322 // When we find a sequence of selects we attempt to optimize their emission 11323 // by sharing the control flow. Currently we only handle cases where we have 11324 // multiple selects with the exact same condition (same LHS, RHS and CC). 11325 // The selects may be interleaved with other instructions if the other 11326 // instructions meet some requirements we deem safe: 11327 // - They are not pseudo instructions. 11328 // - They are debug instructions. Otherwise, 11329 // - They do not have side-effects, do not access memory and their inputs do 11330 // not depend on the results of the select pseudo-instructions. 11331 // The TrueV/FalseV operands of the selects cannot depend on the result of 11332 // previous selects in the sequence. 11333 // These conditions could be further relaxed. See the X86 target for a 11334 // related approach and more information. 11335 // 11336 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)) 11337 // is checked here and handled by a separate function - 11338 // EmitLoweredCascadedSelect. 11339 Register LHS = MI.getOperand(1).getReg(); 11340 Register RHS = MI.getOperand(2).getReg(); 11341 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 11342 11343 SmallVector<MachineInstr *, 4> SelectDebugValues; 11344 SmallSet<Register, 4> SelectDests; 11345 SelectDests.insert(MI.getOperand(0).getReg()); 11346 11347 MachineInstr *LastSelectPseudo = &MI; 11348 auto Next = next_nodbg(MI.getIterator(), BB->instr_end()); 11349 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() && 11350 Next->getOpcode() == MI.getOpcode() && 11351 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() && 11352 Next->getOperand(5).isKill()) { 11353 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget); 11354 } 11355 11356 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 11357 SequenceMBBI != E; ++SequenceMBBI) { 11358 if (SequenceMBBI->isDebugInstr()) 11359 continue; 11360 if (isSelectPseudo(*SequenceMBBI)) { 11361 if (SequenceMBBI->getOperand(1).getReg() != LHS || 11362 SequenceMBBI->getOperand(2).getReg() != RHS || 11363 SequenceMBBI->getOperand(3).getImm() != CC || 11364 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 11365 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 11366 break; 11367 LastSelectPseudo = &*SequenceMBBI; 11368 SequenceMBBI->collectDebugValues(SelectDebugValues); 11369 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 11370 continue; 11371 } 11372 if (SequenceMBBI->hasUnmodeledSideEffects() || 11373 SequenceMBBI->mayLoadOrStore() || 11374 SequenceMBBI->usesCustomInsertionHook()) 11375 break; 11376 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 11377 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 11378 })) 11379 break; 11380 } 11381 11382 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 11383 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 11384 DebugLoc DL = MI.getDebugLoc(); 11385 MachineFunction::iterator I = ++BB->getIterator(); 11386 11387 MachineBasicBlock *HeadMBB = BB; 11388 MachineFunction *F = BB->getParent(); 11389 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 11390 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 11391 11392 F->insert(I, IfFalseMBB); 11393 F->insert(I, TailMBB); 11394 11395 // Transfer debug instructions associated with the selects to TailMBB. 11396 for (MachineInstr *DebugInstr : SelectDebugValues) { 11397 TailMBB->push_back(DebugInstr->removeFromParent()); 11398 } 11399 11400 // Move all instructions after the sequence to TailMBB. 11401 TailMBB->splice(TailMBB->end(), HeadMBB, 11402 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 11403 // Update machine-CFG edges by transferring all successors of the current 11404 // block to the new block which will contain the Phi nodes for the selects. 11405 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 11406 // Set the successors for HeadMBB. 11407 HeadMBB->addSuccessor(IfFalseMBB); 11408 HeadMBB->addSuccessor(TailMBB); 11409 11410 // Insert appropriate branch. 11411 BuildMI(HeadMBB, DL, TII.getBrCond(CC)) 11412 .addReg(LHS) 11413 .addReg(RHS) 11414 .addMBB(TailMBB); 11415 11416 // IfFalseMBB just falls through to TailMBB. 11417 IfFalseMBB->addSuccessor(TailMBB); 11418 11419 // Create PHIs for all of the select pseudo-instructions. 11420 auto SelectMBBI = MI.getIterator(); 11421 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 11422 auto InsertionPoint = TailMBB->begin(); 11423 while (SelectMBBI != SelectEnd) { 11424 auto Next = std::next(SelectMBBI); 11425 if (isSelectPseudo(*SelectMBBI)) { 11426 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 11427 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 11428 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 11429 .addReg(SelectMBBI->getOperand(4).getReg()) 11430 .addMBB(HeadMBB) 11431 .addReg(SelectMBBI->getOperand(5).getReg()) 11432 .addMBB(IfFalseMBB); 11433 SelectMBBI->eraseFromParent(); 11434 } 11435 SelectMBBI = Next; 11436 } 11437 11438 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 11439 return TailMBB; 11440 } 11441 11442 static MachineBasicBlock * 11443 emitVFCVT_RM_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) { 11444 DebugLoc DL = MI.getDebugLoc(); 11445 11446 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 11447 11448 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 11449 Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass); 11450 11451 // Update FRM and save the old value. 11452 BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM) 11453 .addImm(MI.getOperand(4).getImm()); 11454 11455 // Emit an VFCVT without the FRM operand. 11456 assert(MI.getNumOperands() == 8); 11457 auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode)) 11458 .add(MI.getOperand(0)) 11459 .add(MI.getOperand(1)) 11460 .add(MI.getOperand(2)) 11461 .add(MI.getOperand(3)) 11462 .add(MI.getOperand(5)) 11463 .add(MI.getOperand(6)) 11464 .add(MI.getOperand(7)); 11465 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 11466 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 11467 11468 // Restore FRM. 11469 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM)) 11470 .addReg(SavedFRM, RegState::Kill); 11471 11472 // Erase the pseudoinstruction. 11473 MI.eraseFromParent(); 11474 return BB; 11475 } 11476 11477 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, 11478 MachineBasicBlock *BB, 11479 unsigned CVTXOpc, 11480 unsigned CVTFOpc) { 11481 DebugLoc DL = MI.getDebugLoc(); 11482 11483 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 11484 11485 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 11486 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass); 11487 11488 // Save the old value of FFLAGS. 11489 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS); 11490 11491 assert(MI.getNumOperands() == 7); 11492 11493 // Emit a VFCVT_X_F 11494 const TargetRegisterInfo *TRI = 11495 BB->getParent()->getSubtarget().getRegisterInfo(); 11496 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI); 11497 Register Tmp = MRI.createVirtualRegister(RC); 11498 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp) 11499 .add(MI.getOperand(1)) 11500 .add(MI.getOperand(2)) 11501 .add(MI.getOperand(3)) 11502 .add(MI.getOperand(4)) 11503 .add(MI.getOperand(5)) 11504 .add(MI.getOperand(6)); 11505 11506 // Emit a VFCVT_F_X 11507 BuildMI(*BB, MI, DL, TII.get(CVTFOpc)) 11508 .add(MI.getOperand(0)) 11509 .add(MI.getOperand(1)) 11510 .addReg(Tmp) 11511 .add(MI.getOperand(3)) 11512 .add(MI.getOperand(4)) 11513 .add(MI.getOperand(5)) 11514 .add(MI.getOperand(6)); 11515 11516 // Restore FFLAGS. 11517 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS)) 11518 .addReg(SavedFFLAGS, RegState::Kill); 11519 11520 // Erase the pseudoinstruction. 11521 MI.eraseFromParent(); 11522 return BB; 11523 } 11524 11525 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, 11526 const RISCVSubtarget &Subtarget) { 11527 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc; 11528 const TargetRegisterClass *RC; 11529 switch (MI.getOpcode()) { 11530 default: 11531 llvm_unreachable("Unexpected opcode"); 11532 case RISCV::PseudoFROUND_H: 11533 CmpOpc = RISCV::FLT_H; 11534 F2IOpc = RISCV::FCVT_W_H; 11535 I2FOpc = RISCV::FCVT_H_W; 11536 FSGNJOpc = RISCV::FSGNJ_H; 11537 FSGNJXOpc = RISCV::FSGNJX_H; 11538 RC = &RISCV::FPR16RegClass; 11539 break; 11540 case RISCV::PseudoFROUND_S: 11541 CmpOpc = RISCV::FLT_S; 11542 F2IOpc = RISCV::FCVT_W_S; 11543 I2FOpc = RISCV::FCVT_S_W; 11544 FSGNJOpc = RISCV::FSGNJ_S; 11545 FSGNJXOpc = RISCV::FSGNJX_S; 11546 RC = &RISCV::FPR32RegClass; 11547 break; 11548 case RISCV::PseudoFROUND_D: 11549 assert(Subtarget.is64Bit() && "Expected 64-bit GPR."); 11550 CmpOpc = RISCV::FLT_D; 11551 F2IOpc = RISCV::FCVT_L_D; 11552 I2FOpc = RISCV::FCVT_D_L; 11553 FSGNJOpc = RISCV::FSGNJ_D; 11554 FSGNJXOpc = RISCV::FSGNJX_D; 11555 RC = &RISCV::FPR64RegClass; 11556 break; 11557 } 11558 11559 const BasicBlock *BB = MBB->getBasicBlock(); 11560 DebugLoc DL = MI.getDebugLoc(); 11561 MachineFunction::iterator I = ++MBB->getIterator(); 11562 11563 MachineFunction *F = MBB->getParent(); 11564 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB); 11565 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB); 11566 11567 F->insert(I, CvtMBB); 11568 F->insert(I, DoneMBB); 11569 // Move all instructions after the sequence to DoneMBB. 11570 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI), 11571 MBB->end()); 11572 // Update machine-CFG edges by transferring all successors of the current 11573 // block to the new block which will contain the Phi nodes for the selects. 11574 DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); 11575 // Set the successors for MBB. 11576 MBB->addSuccessor(CvtMBB); 11577 MBB->addSuccessor(DoneMBB); 11578 11579 Register DstReg = MI.getOperand(0).getReg(); 11580 Register SrcReg = MI.getOperand(1).getReg(); 11581 Register MaxReg = MI.getOperand(2).getReg(); 11582 int64_t FRM = MI.getOperand(3).getImm(); 11583 11584 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 11585 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 11586 11587 Register FabsReg = MRI.createVirtualRegister(RC); 11588 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg); 11589 11590 // Compare the FP value to the max value. 11591 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 11592 auto MIB = 11593 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg); 11594 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 11595 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 11596 11597 // Insert branch. 11598 BuildMI(MBB, DL, TII.get(RISCV::BEQ)) 11599 .addReg(CmpReg) 11600 .addReg(RISCV::X0) 11601 .addMBB(DoneMBB); 11602 11603 CvtMBB->addSuccessor(DoneMBB); 11604 11605 // Convert to integer. 11606 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 11607 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM); 11608 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 11609 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 11610 11611 // Convert back to FP. 11612 Register I2FReg = MRI.createVirtualRegister(RC); 11613 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM); 11614 if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept)) 11615 MIB->setFlag(MachineInstr::MIFlag::NoFPExcept); 11616 11617 // Restore the sign bit. 11618 Register CvtReg = MRI.createVirtualRegister(RC); 11619 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg); 11620 11621 // Merge the results. 11622 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg) 11623 .addReg(SrcReg) 11624 .addMBB(MBB) 11625 .addReg(CvtReg) 11626 .addMBB(CvtMBB); 11627 11628 MI.eraseFromParent(); 11629 return DoneMBB; 11630 } 11631 11632 MachineBasicBlock * 11633 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 11634 MachineBasicBlock *BB) const { 11635 switch (MI.getOpcode()) { 11636 default: 11637 llvm_unreachable("Unexpected instr type to insert"); 11638 case RISCV::ReadCycleWide: 11639 assert(!Subtarget.is64Bit() && 11640 "ReadCycleWrite is only to be used on riscv32"); 11641 return emitReadCycleWidePseudo(MI, BB); 11642 case RISCV::Select_GPR_Using_CC_GPR: 11643 case RISCV::Select_FPR16_Using_CC_GPR: 11644 case RISCV::Select_FPR32_Using_CC_GPR: 11645 case RISCV::Select_FPR64_Using_CC_GPR: 11646 return emitSelectPseudo(MI, BB, Subtarget); 11647 case RISCV::BuildPairF64Pseudo: 11648 return emitBuildPairF64Pseudo(MI, BB); 11649 case RISCV::SplitF64Pseudo: 11650 return emitSplitF64Pseudo(MI, BB); 11651 case RISCV::PseudoQuietFLE_H: 11652 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget); 11653 case RISCV::PseudoQuietFLT_H: 11654 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget); 11655 case RISCV::PseudoQuietFLE_S: 11656 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget); 11657 case RISCV::PseudoQuietFLT_S: 11658 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget); 11659 case RISCV::PseudoQuietFLE_D: 11660 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget); 11661 case RISCV::PseudoQuietFLT_D: 11662 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget); 11663 11664 // ========================================================================= 11665 // VFCVT 11666 // ========================================================================= 11667 11668 case RISCV::PseudoVFCVT_RM_X_F_V_M1_MASK: 11669 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK); 11670 case RISCV::PseudoVFCVT_RM_X_F_V_M2_MASK: 11671 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK); 11672 case RISCV::PseudoVFCVT_RM_X_F_V_M4_MASK: 11673 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK); 11674 case RISCV::PseudoVFCVT_RM_X_F_V_M8_MASK: 11675 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK); 11676 case RISCV::PseudoVFCVT_RM_X_F_V_MF2_MASK: 11677 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK); 11678 case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK: 11679 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK); 11680 11681 case RISCV::PseudoVFCVT_RM_XU_F_V_M1_MASK: 11682 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M1_MASK); 11683 case RISCV::PseudoVFCVT_RM_XU_F_V_M2_MASK: 11684 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M2_MASK); 11685 case RISCV::PseudoVFCVT_RM_XU_F_V_M4_MASK: 11686 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M4_MASK); 11687 case RISCV::PseudoVFCVT_RM_XU_F_V_M8_MASK: 11688 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M8_MASK); 11689 case RISCV::PseudoVFCVT_RM_XU_F_V_MF2_MASK: 11690 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF2_MASK); 11691 case RISCV::PseudoVFCVT_RM_XU_F_V_MF4_MASK: 11692 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF4_MASK); 11693 11694 case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK: 11695 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK); 11696 case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK: 11697 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M2_MASK); 11698 case RISCV::PseudoVFCVT_RM_F_XU_V_M4_MASK: 11699 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M4_MASK); 11700 case RISCV::PseudoVFCVT_RM_F_XU_V_M8_MASK: 11701 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M8_MASK); 11702 case RISCV::PseudoVFCVT_RM_F_XU_V_MF2_MASK: 11703 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF2_MASK); 11704 case RISCV::PseudoVFCVT_RM_F_XU_V_MF4_MASK: 11705 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF4_MASK); 11706 11707 case RISCV::PseudoVFCVT_RM_F_X_V_M1_MASK: 11708 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M1_MASK); 11709 case RISCV::PseudoVFCVT_RM_F_X_V_M2_MASK: 11710 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M2_MASK); 11711 case RISCV::PseudoVFCVT_RM_F_X_V_M4_MASK: 11712 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M4_MASK); 11713 case RISCV::PseudoVFCVT_RM_F_X_V_M8_MASK: 11714 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M8_MASK); 11715 case RISCV::PseudoVFCVT_RM_F_X_V_MF2_MASK: 11716 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_MF2_MASK); 11717 case RISCV::PseudoVFCVT_RM_F_X_V_MF4_MASK: 11718 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_MF4_MASK); 11719 11720 // ========================================================================= 11721 // VFWCVT 11722 // ========================================================================= 11723 11724 case RISCV::PseudoVFWCVT_RM_XU_F_V_M1_MASK: 11725 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M1_MASK); 11726 case RISCV::PseudoVFWCVT_RM_XU_F_V_M2_MASK: 11727 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M2_MASK); 11728 case RISCV::PseudoVFWCVT_RM_XU_F_V_M4_MASK: 11729 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M4_MASK); 11730 case RISCV::PseudoVFWCVT_RM_XU_F_V_MF2_MASK: 11731 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF2_MASK); 11732 case RISCV::PseudoVFWCVT_RM_XU_F_V_MF4_MASK: 11733 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF4_MASK); 11734 11735 case RISCV::PseudoVFWCVT_RM_X_F_V_M1_MASK: 11736 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M1_MASK); 11737 case RISCV::PseudoVFWCVT_RM_X_F_V_M2_MASK: 11738 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M2_MASK); 11739 case RISCV::PseudoVFWCVT_RM_X_F_V_M4_MASK: 11740 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M4_MASK); 11741 case RISCV::PseudoVFWCVT_RM_X_F_V_MF2_MASK: 11742 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF2_MASK); 11743 case RISCV::PseudoVFWCVT_RM_X_F_V_MF4_MASK: 11744 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF4_MASK); 11745 11746 case RISCV::PseudoVFWCVT_RM_F_XU_V_M1_MASK: 11747 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M1_MASK); 11748 case RISCV::PseudoVFWCVT_RM_F_XU_V_M2_MASK: 11749 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M2_MASK); 11750 case RISCV::PseudoVFWCVT_RM_F_XU_V_M4_MASK: 11751 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M4_MASK); 11752 case RISCV::PseudoVFWCVT_RM_F_XU_V_MF2_MASK: 11753 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF2_MASK); 11754 case RISCV::PseudoVFWCVT_RM_F_XU_V_MF4_MASK: 11755 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF4_MASK); 11756 case RISCV::PseudoVFWCVT_RM_F_XU_V_MF8_MASK: 11757 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF8_MASK); 11758 11759 case RISCV::PseudoVFWCVT_RM_F_X_V_M1_MASK: 11760 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M1_MASK); 11761 case RISCV::PseudoVFWCVT_RM_F_X_V_M2_MASK: 11762 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M2_MASK); 11763 case RISCV::PseudoVFWCVT_RM_F_X_V_M4_MASK: 11764 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M4_MASK); 11765 case RISCV::PseudoVFWCVT_RM_F_X_V_MF2_MASK: 11766 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF2_MASK); 11767 case RISCV::PseudoVFWCVT_RM_F_X_V_MF4_MASK: 11768 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF4_MASK); 11769 case RISCV::PseudoVFWCVT_RM_F_X_V_MF8_MASK: 11770 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF8_MASK); 11771 11772 // ========================================================================= 11773 // VFNCVT 11774 // ========================================================================= 11775 11776 case RISCV::PseudoVFNCVT_RM_XU_F_W_M1_MASK: 11777 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M1_MASK); 11778 case RISCV::PseudoVFNCVT_RM_XU_F_W_M2_MASK: 11779 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M2_MASK); 11780 case RISCV::PseudoVFNCVT_RM_XU_F_W_M4_MASK: 11781 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M4_MASK); 11782 case RISCV::PseudoVFNCVT_RM_XU_F_W_MF2_MASK: 11783 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF2_MASK); 11784 case RISCV::PseudoVFNCVT_RM_XU_F_W_MF4_MASK: 11785 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF4_MASK); 11786 case RISCV::PseudoVFNCVT_RM_XU_F_W_MF8_MASK: 11787 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_XU_F_W_MF8_MASK); 11788 11789 case RISCV::PseudoVFNCVT_RM_X_F_W_M1_MASK: 11790 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M1_MASK); 11791 case RISCV::PseudoVFNCVT_RM_X_F_W_M2_MASK: 11792 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M2_MASK); 11793 case RISCV::PseudoVFNCVT_RM_X_F_W_M4_MASK: 11794 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M4_MASK); 11795 case RISCV::PseudoVFNCVT_RM_X_F_W_MF2_MASK: 11796 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF2_MASK); 11797 case RISCV::PseudoVFNCVT_RM_X_F_W_MF4_MASK: 11798 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF4_MASK); 11799 case RISCV::PseudoVFNCVT_RM_X_F_W_MF8_MASK: 11800 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF8_MASK); 11801 11802 case RISCV::PseudoVFNCVT_RM_F_XU_W_M1_MASK: 11803 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M1_MASK); 11804 case RISCV::PseudoVFNCVT_RM_F_XU_W_M2_MASK: 11805 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M2_MASK); 11806 case RISCV::PseudoVFNCVT_RM_F_XU_W_M4_MASK: 11807 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M4_MASK); 11808 case RISCV::PseudoVFNCVT_RM_F_XU_W_MF2_MASK: 11809 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF2_MASK); 11810 case RISCV::PseudoVFNCVT_RM_F_XU_W_MF4_MASK: 11811 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF4_MASK); 11812 11813 case RISCV::PseudoVFNCVT_RM_F_X_W_M1_MASK: 11814 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M1_MASK); 11815 case RISCV::PseudoVFNCVT_RM_F_X_W_M2_MASK: 11816 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M2_MASK); 11817 case RISCV::PseudoVFNCVT_RM_F_X_W_M4_MASK: 11818 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M4_MASK); 11819 case RISCV::PseudoVFNCVT_RM_F_X_W_MF2_MASK: 11820 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF2_MASK); 11821 case RISCV::PseudoVFNCVT_RM_F_X_W_MF4_MASK: 11822 return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF4_MASK); 11823 11824 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK: 11825 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK, 11826 RISCV::PseudoVFCVT_F_X_V_M1_MASK); 11827 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK: 11828 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK, 11829 RISCV::PseudoVFCVT_F_X_V_M2_MASK); 11830 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK: 11831 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK, 11832 RISCV::PseudoVFCVT_F_X_V_M4_MASK); 11833 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK: 11834 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK, 11835 RISCV::PseudoVFCVT_F_X_V_M8_MASK); 11836 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK: 11837 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK, 11838 RISCV::PseudoVFCVT_F_X_V_MF2_MASK); 11839 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK: 11840 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK, 11841 RISCV::PseudoVFCVT_F_X_V_MF4_MASK); 11842 case RISCV::PseudoFROUND_H: 11843 case RISCV::PseudoFROUND_S: 11844 case RISCV::PseudoFROUND_D: 11845 return emitFROUND(MI, BB, Subtarget); 11846 } 11847 } 11848 11849 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, 11850 SDNode *Node) const { 11851 // Add FRM dependency to any instructions with dynamic rounding mode. 11852 unsigned Opc = MI.getOpcode(); 11853 auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm); 11854 if (Idx < 0) 11855 return; 11856 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN) 11857 return; 11858 // If the instruction already reads FRM, don't add another read. 11859 if (MI.readsRegister(RISCV::FRM)) 11860 return; 11861 MI.addOperand( 11862 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); 11863 } 11864 11865 // Calling Convention Implementation. 11866 // The expectations for frontend ABI lowering vary from target to target. 11867 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 11868 // details, but this is a longer term goal. For now, we simply try to keep the 11869 // role of the frontend as simple and well-defined as possible. The rules can 11870 // be summarised as: 11871 // * Never split up large scalar arguments. We handle them here. 11872 // * If a hardfloat calling convention is being used, and the struct may be 11873 // passed in a pair of registers (fp+fp, int+fp), and both registers are 11874 // available, then pass as two separate arguments. If either the GPRs or FPRs 11875 // are exhausted, then pass according to the rule below. 11876 // * If a struct could never be passed in registers or directly in a stack 11877 // slot (as it is larger than 2*XLEN and the floating point rules don't 11878 // apply), then pass it using a pointer with the byval attribute. 11879 // * If a struct is less than 2*XLEN, then coerce to either a two-element 11880 // word-sized array or a 2*XLEN scalar (depending on alignment). 11881 // * The frontend can determine whether a struct is returned by reference or 11882 // not based on its size and fields. If it will be returned by reference, the 11883 // frontend must modify the prototype so a pointer with the sret annotation is 11884 // passed as the first argument. This is not necessary for large scalar 11885 // returns. 11886 // * Struct return values and varargs should be coerced to structs containing 11887 // register-size fields in the same situations they would be for fixed 11888 // arguments. 11889 11890 static const MCPhysReg ArgGPRs[] = { 11891 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 11892 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 11893 }; 11894 static const MCPhysReg ArgFPR16s[] = { 11895 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 11896 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 11897 }; 11898 static const MCPhysReg ArgFPR32s[] = { 11899 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 11900 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 11901 }; 11902 static const MCPhysReg ArgFPR64s[] = { 11903 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 11904 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 11905 }; 11906 // This is an interim calling convention and it may be changed in the future. 11907 static const MCPhysReg ArgVRs[] = { 11908 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 11909 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 11910 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 11911 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 11912 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 11913 RISCV::V20M2, RISCV::V22M2}; 11914 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 11915 RISCV::V20M4}; 11916 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 11917 11918 // Pass a 2*XLEN argument that has been split into two XLEN values through 11919 // registers or the stack as necessary. 11920 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 11921 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 11922 MVT ValVT2, MVT LocVT2, 11923 ISD::ArgFlagsTy ArgFlags2) { 11924 unsigned XLenInBytes = XLen / 8; 11925 if (Register Reg = State.AllocateReg(ArgGPRs)) { 11926 // At least one half can be passed via register. 11927 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 11928 VA1.getLocVT(), CCValAssign::Full)); 11929 } else { 11930 // Both halves must be passed on the stack, with proper alignment. 11931 Align StackAlign = 11932 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 11933 State.addLoc( 11934 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 11935 State.AllocateStack(XLenInBytes, StackAlign), 11936 VA1.getLocVT(), CCValAssign::Full)); 11937 State.addLoc(CCValAssign::getMem( 11938 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 11939 LocVT2, CCValAssign::Full)); 11940 return false; 11941 } 11942 11943 if (Register Reg = State.AllocateReg(ArgGPRs)) { 11944 // The second half can also be passed via register. 11945 State.addLoc( 11946 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 11947 } else { 11948 // The second half is passed via the stack, without additional alignment. 11949 State.addLoc(CCValAssign::getMem( 11950 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 11951 LocVT2, CCValAssign::Full)); 11952 } 11953 11954 return false; 11955 } 11956 11957 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, 11958 std::optional<unsigned> FirstMaskArgument, 11959 CCState &State, const RISCVTargetLowering &TLI) { 11960 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 11961 if (RC == &RISCV::VRRegClass) { 11962 // Assign the first mask argument to V0. 11963 // This is an interim calling convention and it may be changed in the 11964 // future. 11965 if (FirstMaskArgument && ValNo == *FirstMaskArgument) 11966 return State.AllocateReg(RISCV::V0); 11967 return State.AllocateReg(ArgVRs); 11968 } 11969 if (RC == &RISCV::VRM2RegClass) 11970 return State.AllocateReg(ArgVRM2s); 11971 if (RC == &RISCV::VRM4RegClass) 11972 return State.AllocateReg(ArgVRM4s); 11973 if (RC == &RISCV::VRM8RegClass) 11974 return State.AllocateReg(ArgVRM8s); 11975 llvm_unreachable("Unhandled register class for ValueType"); 11976 } 11977 11978 // Implements the RISC-V calling convention. Returns true upon failure. 11979 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 11980 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 11981 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 11982 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 11983 std::optional<unsigned> FirstMaskArgument) { 11984 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 11985 assert(XLen == 32 || XLen == 64); 11986 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 11987 11988 // Static chain parameter must not be passed in normal argument registers, 11989 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain 11990 if (ArgFlags.isNest()) { 11991 if (unsigned Reg = State.AllocateReg(RISCV::X7)) { 11992 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 11993 return false; 11994 } 11995 } 11996 11997 // Any return value split in to more than two values can't be returned 11998 // directly. Vectors are returned via the available vector registers. 11999 if (!LocVT.isVector() && IsRet && ValNo > 1) 12000 return true; 12001 12002 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 12003 // variadic argument, or if no F16/F32 argument registers are available. 12004 bool UseGPRForF16_F32 = true; 12005 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 12006 // variadic argument, or if no F64 argument registers are available. 12007 bool UseGPRForF64 = true; 12008 12009 switch (ABI) { 12010 default: 12011 llvm_unreachable("Unexpected ABI"); 12012 case RISCVABI::ABI_ILP32: 12013 case RISCVABI::ABI_LP64: 12014 break; 12015 case RISCVABI::ABI_ILP32F: 12016 case RISCVABI::ABI_LP64F: 12017 UseGPRForF16_F32 = !IsFixed; 12018 break; 12019 case RISCVABI::ABI_ILP32D: 12020 case RISCVABI::ABI_LP64D: 12021 UseGPRForF16_F32 = !IsFixed; 12022 UseGPRForF64 = !IsFixed; 12023 break; 12024 } 12025 12026 // FPR16, FPR32, and FPR64 alias each other. 12027 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) { 12028 UseGPRForF16_F32 = true; 12029 UseGPRForF64 = true; 12030 } 12031 12032 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 12033 // similar local variables rather than directly checking against the target 12034 // ABI. 12035 12036 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 12037 LocVT = XLenVT; 12038 LocInfo = CCValAssign::BCvt; 12039 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 12040 LocVT = MVT::i64; 12041 LocInfo = CCValAssign::BCvt; 12042 } 12043 12044 // If this is a variadic argument, the RISC-V calling convention requires 12045 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 12046 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 12047 // be used regardless of whether the original argument was split during 12048 // legalisation or not. The argument will not be passed by registers if the 12049 // original type is larger than 2*XLEN, so the register alignment rule does 12050 // not apply. 12051 unsigned TwoXLenInBytes = (2 * XLen) / 8; 12052 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 12053 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 12054 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 12055 // Skip 'odd' register if necessary. 12056 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) 12057 State.AllocateReg(ArgGPRs); 12058 } 12059 12060 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 12061 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 12062 State.getPendingArgFlags(); 12063 12064 assert(PendingLocs.size() == PendingArgFlags.size() && 12065 "PendingLocs and PendingArgFlags out of sync"); 12066 12067 // Handle passing f64 on RV32D with a soft float ABI or when floating point 12068 // registers are exhausted. 12069 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 12070 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 12071 "Can't lower f64 if it is split"); 12072 // Depending on available argument GPRS, f64 may be passed in a pair of 12073 // GPRs, split between a GPR and the stack, or passed completely on the 12074 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 12075 // cases. 12076 Register Reg = State.AllocateReg(ArgGPRs); 12077 LocVT = MVT::i32; 12078 if (!Reg) { 12079 unsigned StackOffset = State.AllocateStack(8, Align(8)); 12080 State.addLoc( 12081 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 12082 return false; 12083 } 12084 if (!State.AllocateReg(ArgGPRs)) 12085 State.AllocateStack(4, Align(4)); 12086 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12087 return false; 12088 } 12089 12090 // Fixed-length vectors are located in the corresponding scalable-vector 12091 // container types. 12092 if (ValVT.isFixedLengthVector()) 12093 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 12094 12095 // Split arguments might be passed indirectly, so keep track of the pending 12096 // values. Split vectors are passed via a mix of registers and indirectly, so 12097 // treat them as we would any other argument. 12098 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 12099 LocVT = XLenVT; 12100 LocInfo = CCValAssign::Indirect; 12101 PendingLocs.push_back( 12102 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 12103 PendingArgFlags.push_back(ArgFlags); 12104 if (!ArgFlags.isSplitEnd()) { 12105 return false; 12106 } 12107 } 12108 12109 // If the split argument only had two elements, it should be passed directly 12110 // in registers or on the stack. 12111 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 12112 PendingLocs.size() <= 2) { 12113 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 12114 // Apply the normal calling convention rules to the first half of the 12115 // split argument. 12116 CCValAssign VA = PendingLocs[0]; 12117 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 12118 PendingLocs.clear(); 12119 PendingArgFlags.clear(); 12120 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 12121 ArgFlags); 12122 } 12123 12124 // Allocate to a register if possible, or else a stack slot. 12125 Register Reg; 12126 unsigned StoreSizeBytes = XLen / 8; 12127 Align StackAlign = Align(XLen / 8); 12128 12129 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 12130 Reg = State.AllocateReg(ArgFPR16s); 12131 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 12132 Reg = State.AllocateReg(ArgFPR32s); 12133 else if (ValVT == MVT::f64 && !UseGPRForF64) 12134 Reg = State.AllocateReg(ArgFPR64s); 12135 else if (ValVT.isVector()) { 12136 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI); 12137 if (!Reg) { 12138 // For return values, the vector must be passed fully via registers or 12139 // via the stack. 12140 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 12141 // but we're using all of them. 12142 if (IsRet) 12143 return true; 12144 // Try using a GPR to pass the address 12145 if ((Reg = State.AllocateReg(ArgGPRs))) { 12146 LocVT = XLenVT; 12147 LocInfo = CCValAssign::Indirect; 12148 } else if (ValVT.isScalableVector()) { 12149 LocVT = XLenVT; 12150 LocInfo = CCValAssign::Indirect; 12151 } else { 12152 // Pass fixed-length vectors on the stack. 12153 LocVT = ValVT; 12154 StoreSizeBytes = ValVT.getStoreSize(); 12155 // Align vectors to their element sizes, being careful for vXi1 12156 // vectors. 12157 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 12158 } 12159 } 12160 } else { 12161 Reg = State.AllocateReg(ArgGPRs); 12162 } 12163 12164 unsigned StackOffset = 12165 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 12166 12167 // If we reach this point and PendingLocs is non-empty, we must be at the 12168 // end of a split argument that must be passed indirectly. 12169 if (!PendingLocs.empty()) { 12170 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 12171 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 12172 12173 for (auto &It : PendingLocs) { 12174 if (Reg) 12175 It.convertToReg(Reg); 12176 else 12177 It.convertToMem(StackOffset); 12178 State.addLoc(It); 12179 } 12180 PendingLocs.clear(); 12181 PendingArgFlags.clear(); 12182 return false; 12183 } 12184 12185 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 12186 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) && 12187 "Expected an XLenVT or vector types at this stage"); 12188 12189 if (Reg) { 12190 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12191 return false; 12192 } 12193 12194 // When a floating-point value is passed on the stack, no bit-conversion is 12195 // needed. 12196 if (ValVT.isFloatingPoint()) { 12197 LocVT = ValVT; 12198 LocInfo = CCValAssign::Full; 12199 } 12200 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 12201 return false; 12202 } 12203 12204 template <typename ArgTy> 12205 static std::optional<unsigned> preAssignMask(const ArgTy &Args) { 12206 for (const auto &ArgIdx : enumerate(Args)) { 12207 MVT ArgVT = ArgIdx.value().VT; 12208 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 12209 return ArgIdx.index(); 12210 } 12211 return std::nullopt; 12212 } 12213 12214 void RISCVTargetLowering::analyzeInputArgs( 12215 MachineFunction &MF, CCState &CCInfo, 12216 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 12217 RISCVCCAssignFn Fn) const { 12218 unsigned NumArgs = Ins.size(); 12219 FunctionType *FType = MF.getFunction().getFunctionType(); 12220 12221 std::optional<unsigned> FirstMaskArgument; 12222 if (Subtarget.hasVInstructions()) 12223 FirstMaskArgument = preAssignMask(Ins); 12224 12225 for (unsigned i = 0; i != NumArgs; ++i) { 12226 MVT ArgVT = Ins[i].VT; 12227 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 12228 12229 Type *ArgTy = nullptr; 12230 if (IsRet) 12231 ArgTy = FType->getReturnType(); 12232 else if (Ins[i].isOrigArg()) 12233 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 12234 12235 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 12236 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 12237 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 12238 FirstMaskArgument)) { 12239 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 12240 << EVT(ArgVT).getEVTString() << '\n'); 12241 llvm_unreachable(nullptr); 12242 } 12243 } 12244 } 12245 12246 void RISCVTargetLowering::analyzeOutputArgs( 12247 MachineFunction &MF, CCState &CCInfo, 12248 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 12249 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { 12250 unsigned NumArgs = Outs.size(); 12251 12252 std::optional<unsigned> FirstMaskArgument; 12253 if (Subtarget.hasVInstructions()) 12254 FirstMaskArgument = preAssignMask(Outs); 12255 12256 for (unsigned i = 0; i != NumArgs; i++) { 12257 MVT ArgVT = Outs[i].VT; 12258 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 12259 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 12260 12261 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 12262 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 12263 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 12264 FirstMaskArgument)) { 12265 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 12266 << EVT(ArgVT).getEVTString() << "\n"); 12267 llvm_unreachable(nullptr); 12268 } 12269 } 12270 } 12271 12272 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 12273 // values. 12274 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 12275 const CCValAssign &VA, const SDLoc &DL, 12276 const RISCVSubtarget &Subtarget) { 12277 switch (VA.getLocInfo()) { 12278 default: 12279 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 12280 case CCValAssign::Full: 12281 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 12282 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 12283 break; 12284 case CCValAssign::BCvt: 12285 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 12286 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 12287 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 12288 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 12289 else 12290 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 12291 break; 12292 } 12293 return Val; 12294 } 12295 12296 // The caller is responsible for loading the full value if the argument is 12297 // passed with CCValAssign::Indirect. 12298 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 12299 const CCValAssign &VA, const SDLoc &DL, 12300 const ISD::InputArg &In, 12301 const RISCVTargetLowering &TLI) { 12302 MachineFunction &MF = DAG.getMachineFunction(); 12303 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 12304 EVT LocVT = VA.getLocVT(); 12305 SDValue Val; 12306 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 12307 Register VReg = RegInfo.createVirtualRegister(RC); 12308 RegInfo.addLiveIn(VA.getLocReg(), VReg); 12309 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 12310 12311 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass. 12312 if (In.isOrigArg()) { 12313 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex()); 12314 if (OrigArg->getType()->isIntegerTy()) { 12315 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); 12316 // An input zero extended from i31 can also be considered sign extended. 12317 if ((BitWidth <= 32 && In.Flags.isSExt()) || 12318 (BitWidth < 32 && In.Flags.isZExt())) { 12319 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 12320 RVFI->addSExt32Register(VReg); 12321 } 12322 } 12323 } 12324 12325 if (VA.getLocInfo() == CCValAssign::Indirect) 12326 return Val; 12327 12328 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 12329 } 12330 12331 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 12332 const CCValAssign &VA, const SDLoc &DL, 12333 const RISCVSubtarget &Subtarget) { 12334 EVT LocVT = VA.getLocVT(); 12335 12336 switch (VA.getLocInfo()) { 12337 default: 12338 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 12339 case CCValAssign::Full: 12340 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 12341 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 12342 break; 12343 case CCValAssign::BCvt: 12344 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 12345 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 12346 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 12347 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 12348 else 12349 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 12350 break; 12351 } 12352 return Val; 12353 } 12354 12355 // The caller is responsible for loading the full value if the argument is 12356 // passed with CCValAssign::Indirect. 12357 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 12358 const CCValAssign &VA, const SDLoc &DL) { 12359 MachineFunction &MF = DAG.getMachineFunction(); 12360 MachineFrameInfo &MFI = MF.getFrameInfo(); 12361 EVT LocVT = VA.getLocVT(); 12362 EVT ValVT = VA.getValVT(); 12363 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 12364 if (ValVT.isScalableVector()) { 12365 // When the value is a scalable vector, we save the pointer which points to 12366 // the scalable vector value in the stack. The ValVT will be the pointer 12367 // type, instead of the scalable vector type. 12368 ValVT = LocVT; 12369 } 12370 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 12371 /*IsImmutable=*/true); 12372 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 12373 SDValue Val; 12374 12375 ISD::LoadExtType ExtType; 12376 switch (VA.getLocInfo()) { 12377 default: 12378 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 12379 case CCValAssign::Full: 12380 case CCValAssign::Indirect: 12381 case CCValAssign::BCvt: 12382 ExtType = ISD::NON_EXTLOAD; 12383 break; 12384 } 12385 Val = DAG.getExtLoad( 12386 ExtType, DL, LocVT, Chain, FIN, 12387 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 12388 return Val; 12389 } 12390 12391 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 12392 const CCValAssign &VA, const SDLoc &DL) { 12393 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 12394 "Unexpected VA"); 12395 MachineFunction &MF = DAG.getMachineFunction(); 12396 MachineFrameInfo &MFI = MF.getFrameInfo(); 12397 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 12398 12399 if (VA.isMemLoc()) { 12400 // f64 is passed on the stack. 12401 int FI = 12402 MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true); 12403 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 12404 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 12405 MachinePointerInfo::getFixedStack(MF, FI)); 12406 } 12407 12408 assert(VA.isRegLoc() && "Expected register VA assignment"); 12409 12410 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 12411 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 12412 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 12413 SDValue Hi; 12414 if (VA.getLocReg() == RISCV::X17) { 12415 // Second half of f64 is passed on the stack. 12416 int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true); 12417 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 12418 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 12419 MachinePointerInfo::getFixedStack(MF, FI)); 12420 } else { 12421 // Second half of f64 is passed in another GPR. 12422 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 12423 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 12424 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 12425 } 12426 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 12427 } 12428 12429 // FastCC has less than 1% performance improvement for some particular 12430 // benchmark. But theoretically, it may has benenfit for some cases. 12431 static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, 12432 unsigned ValNo, MVT ValVT, MVT LocVT, 12433 CCValAssign::LocInfo LocInfo, 12434 ISD::ArgFlagsTy ArgFlags, CCState &State, 12435 bool IsFixed, bool IsRet, Type *OrigTy, 12436 const RISCVTargetLowering &TLI, 12437 std::optional<unsigned> FirstMaskArgument) { 12438 12439 // X5 and X6 might be used for save-restore libcall. 12440 static const MCPhysReg GPRList[] = { 12441 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 12442 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 12443 RISCV::X29, RISCV::X30, RISCV::X31}; 12444 12445 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 12446 if (unsigned Reg = State.AllocateReg(GPRList)) { 12447 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12448 return false; 12449 } 12450 } 12451 12452 if (LocVT == MVT::f16) { 12453 static const MCPhysReg FPR16List[] = { 12454 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 12455 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 12456 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 12457 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 12458 if (unsigned Reg = State.AllocateReg(FPR16List)) { 12459 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12460 return false; 12461 } 12462 } 12463 12464 if (LocVT == MVT::f32) { 12465 static const MCPhysReg FPR32List[] = { 12466 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 12467 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 12468 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 12469 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 12470 if (unsigned Reg = State.AllocateReg(FPR32List)) { 12471 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12472 return false; 12473 } 12474 } 12475 12476 if (LocVT == MVT::f64) { 12477 static const MCPhysReg FPR64List[] = { 12478 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 12479 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 12480 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 12481 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 12482 if (unsigned Reg = State.AllocateReg(FPR64List)) { 12483 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12484 return false; 12485 } 12486 } 12487 12488 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 12489 unsigned Offset4 = State.AllocateStack(4, Align(4)); 12490 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 12491 return false; 12492 } 12493 12494 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 12495 unsigned Offset5 = State.AllocateStack(8, Align(8)); 12496 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 12497 return false; 12498 } 12499 12500 if (LocVT.isVector()) { 12501 if (unsigned Reg = 12502 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) { 12503 // Fixed-length vectors are located in the corresponding scalable-vector 12504 // container types. 12505 if (ValVT.isFixedLengthVector()) 12506 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 12507 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12508 } else { 12509 // Try and pass the address via a "fast" GPR. 12510 if (unsigned GPRReg = State.AllocateReg(GPRList)) { 12511 LocInfo = CCValAssign::Indirect; 12512 LocVT = TLI.getSubtarget().getXLenVT(); 12513 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); 12514 } else if (ValVT.isFixedLengthVector()) { 12515 auto StackAlign = 12516 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 12517 unsigned StackOffset = 12518 State.AllocateStack(ValVT.getStoreSize(), StackAlign); 12519 State.addLoc( 12520 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 12521 } else { 12522 // Can't pass scalable vectors on the stack. 12523 return true; 12524 } 12525 } 12526 12527 return false; 12528 } 12529 12530 return true; // CC didn't match. 12531 } 12532 12533 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 12534 CCValAssign::LocInfo LocInfo, 12535 ISD::ArgFlagsTy ArgFlags, CCState &State) { 12536 12537 if (ArgFlags.isNest()) { 12538 report_fatal_error( 12539 "Attribute 'nest' is not supported in GHC calling convention"); 12540 } 12541 12542 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 12543 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 12544 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 12545 static const MCPhysReg GPRList[] = { 12546 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 12547 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 12548 if (unsigned Reg = State.AllocateReg(GPRList)) { 12549 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12550 return false; 12551 } 12552 } 12553 12554 if (LocVT == MVT::f32) { 12555 // Pass in STG registers: F1, ..., F6 12556 // fs0 ... fs5 12557 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 12558 RISCV::F18_F, RISCV::F19_F, 12559 RISCV::F20_F, RISCV::F21_F}; 12560 if (unsigned Reg = State.AllocateReg(FPR32List)) { 12561 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12562 return false; 12563 } 12564 } 12565 12566 if (LocVT == MVT::f64) { 12567 // Pass in STG registers: D1, ..., D6 12568 // fs6 ... fs11 12569 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 12570 RISCV::F24_D, RISCV::F25_D, 12571 RISCV::F26_D, RISCV::F27_D}; 12572 if (unsigned Reg = State.AllocateReg(FPR64List)) { 12573 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 12574 return false; 12575 } 12576 } 12577 12578 report_fatal_error("No registers left in GHC calling convention"); 12579 return true; 12580 } 12581 12582 // Transform physical registers into virtual registers. 12583 SDValue RISCVTargetLowering::LowerFormalArguments( 12584 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 12585 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 12586 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 12587 12588 MachineFunction &MF = DAG.getMachineFunction(); 12589 12590 switch (CallConv) { 12591 default: 12592 report_fatal_error("Unsupported calling convention"); 12593 case CallingConv::C: 12594 case CallingConv::Fast: 12595 break; 12596 case CallingConv::GHC: 12597 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 12598 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 12599 report_fatal_error( 12600 "GHC calling convention requires the F and D instruction set extensions"); 12601 } 12602 12603 const Function &Func = MF.getFunction(); 12604 if (Func.hasFnAttribute("interrupt")) { 12605 if (!Func.arg_empty()) 12606 report_fatal_error( 12607 "Functions with the interrupt attribute cannot have arguments!"); 12608 12609 StringRef Kind = 12610 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 12611 12612 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 12613 report_fatal_error( 12614 "Function interrupt attribute argument not supported!"); 12615 } 12616 12617 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 12618 MVT XLenVT = Subtarget.getXLenVT(); 12619 unsigned XLenInBytes = Subtarget.getXLen() / 8; 12620 // Used with vargs to acumulate store chains. 12621 std::vector<SDValue> OutChains; 12622 12623 // Assign locations to all of the incoming arguments. 12624 SmallVector<CCValAssign, 16> ArgLocs; 12625 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 12626 12627 if (CallConv == CallingConv::GHC) 12628 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 12629 else 12630 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, 12631 CallConv == CallingConv::Fast ? CC_RISCV_FastCC 12632 : CC_RISCV); 12633 12634 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 12635 CCValAssign &VA = ArgLocs[i]; 12636 SDValue ArgValue; 12637 // Passing f64 on RV32D with a soft float ABI must be handled as a special 12638 // case. 12639 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 12640 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 12641 else if (VA.isRegLoc()) 12642 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this); 12643 else 12644 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 12645 12646 if (VA.getLocInfo() == CCValAssign::Indirect) { 12647 // If the original argument was split and passed by reference (e.g. i128 12648 // on RV32), we need to load all parts of it here (using the same 12649 // address). Vectors may be partly split to registers and partly to the 12650 // stack, in which case the base address is partly offset and subsequent 12651 // stores are relative to that. 12652 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 12653 MachinePointerInfo())); 12654 unsigned ArgIndex = Ins[i].OrigArgIndex; 12655 unsigned ArgPartOffset = Ins[i].PartOffset; 12656 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 12657 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 12658 CCValAssign &PartVA = ArgLocs[i + 1]; 12659 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 12660 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 12661 if (PartVA.getValVT().isScalableVector()) 12662 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 12663 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 12664 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 12665 MachinePointerInfo())); 12666 ++i; 12667 } 12668 continue; 12669 } 12670 InVals.push_back(ArgValue); 12671 } 12672 12673 if (any_of(ArgLocs, 12674 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 12675 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 12676 12677 if (IsVarArg) { 12678 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); 12679 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 12680 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 12681 MachineFrameInfo &MFI = MF.getFrameInfo(); 12682 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 12683 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 12684 12685 // Offset of the first variable argument from stack pointer, and size of 12686 // the vararg save area. For now, the varargs save area is either zero or 12687 // large enough to hold a0-a7. 12688 int VaArgOffset, VarArgsSaveSize; 12689 12690 // If all registers are allocated, then all varargs must be passed on the 12691 // stack and we don't need to save any argregs. 12692 if (ArgRegs.size() == Idx) { 12693 VaArgOffset = CCInfo.getNextStackOffset(); 12694 VarArgsSaveSize = 0; 12695 } else { 12696 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 12697 VaArgOffset = -VarArgsSaveSize; 12698 } 12699 12700 // Record the frame index of the first variable argument 12701 // which is a value necessary to VASTART. 12702 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 12703 RVFI->setVarArgsFrameIndex(FI); 12704 12705 // If saving an odd number of registers then create an extra stack slot to 12706 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 12707 // offsets to even-numbered registered remain 2*XLEN-aligned. 12708 if (Idx % 2) { 12709 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 12710 VarArgsSaveSize += XLenInBytes; 12711 } 12712 12713 // Copy the integer registers that may have been used for passing varargs 12714 // to the vararg save area. 12715 for (unsigned I = Idx; I < ArgRegs.size(); 12716 ++I, VaArgOffset += XLenInBytes) { 12717 const Register Reg = RegInfo.createVirtualRegister(RC); 12718 RegInfo.addLiveIn(ArgRegs[I], Reg); 12719 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 12720 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 12721 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 12722 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 12723 MachinePointerInfo::getFixedStack(MF, FI)); 12724 cast<StoreSDNode>(Store.getNode()) 12725 ->getMemOperand() 12726 ->setValue((Value *)nullptr); 12727 OutChains.push_back(Store); 12728 } 12729 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 12730 } 12731 12732 // All stores are grouped in one node to allow the matching between 12733 // the size of Ins and InVals. This only happens for vararg functions. 12734 if (!OutChains.empty()) { 12735 OutChains.push_back(Chain); 12736 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 12737 } 12738 12739 return Chain; 12740 } 12741 12742 /// isEligibleForTailCallOptimization - Check whether the call is eligible 12743 /// for tail call optimization. 12744 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 12745 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 12746 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 12747 const SmallVector<CCValAssign, 16> &ArgLocs) const { 12748 12749 auto &Callee = CLI.Callee; 12750 auto CalleeCC = CLI.CallConv; 12751 auto &Outs = CLI.Outs; 12752 auto &Caller = MF.getFunction(); 12753 auto CallerCC = Caller.getCallingConv(); 12754 12755 // Exception-handling functions need a special set of instructions to 12756 // indicate a return to the hardware. Tail-calling another function would 12757 // probably break this. 12758 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 12759 // should be expanded as new function attributes are introduced. 12760 if (Caller.hasFnAttribute("interrupt")) 12761 return false; 12762 12763 // Do not tail call opt if the stack is used to pass parameters. 12764 if (CCInfo.getNextStackOffset() != 0) 12765 return false; 12766 12767 // Do not tail call opt if any parameters need to be passed indirectly. 12768 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 12769 // passed indirectly. So the address of the value will be passed in a 12770 // register, or if not available, then the address is put on the stack. In 12771 // order to pass indirectly, space on the stack often needs to be allocated 12772 // in order to store the value. In this case the CCInfo.getNextStackOffset() 12773 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 12774 // are passed CCValAssign::Indirect. 12775 for (auto &VA : ArgLocs) 12776 if (VA.getLocInfo() == CCValAssign::Indirect) 12777 return false; 12778 12779 // Do not tail call opt if either caller or callee uses struct return 12780 // semantics. 12781 auto IsCallerStructRet = Caller.hasStructRetAttr(); 12782 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 12783 if (IsCallerStructRet || IsCalleeStructRet) 12784 return false; 12785 12786 // Externally-defined functions with weak linkage should not be 12787 // tail-called. The behaviour of branch instructions in this situation (as 12788 // used for tail calls) is implementation-defined, so we cannot rely on the 12789 // linker replacing the tail call with a return. 12790 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 12791 const GlobalValue *GV = G->getGlobal(); 12792 if (GV->hasExternalWeakLinkage()) 12793 return false; 12794 } 12795 12796 // The callee has to preserve all registers the caller needs to preserve. 12797 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 12798 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 12799 if (CalleeCC != CallerCC) { 12800 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 12801 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 12802 return false; 12803 } 12804 12805 // Byval parameters hand the function a pointer directly into the stack area 12806 // we want to reuse during a tail call. Working around this *is* possible 12807 // but less efficient and uglier in LowerCall. 12808 for (auto &Arg : Outs) 12809 if (Arg.Flags.isByVal()) 12810 return false; 12811 12812 return true; 12813 } 12814 12815 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 12816 return DAG.getDataLayout().getPrefTypeAlign( 12817 VT.getTypeForEVT(*DAG.getContext())); 12818 } 12819 12820 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 12821 // and output parameter nodes. 12822 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 12823 SmallVectorImpl<SDValue> &InVals) const { 12824 SelectionDAG &DAG = CLI.DAG; 12825 SDLoc &DL = CLI.DL; 12826 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 12827 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 12828 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 12829 SDValue Chain = CLI.Chain; 12830 SDValue Callee = CLI.Callee; 12831 bool &IsTailCall = CLI.IsTailCall; 12832 CallingConv::ID CallConv = CLI.CallConv; 12833 bool IsVarArg = CLI.IsVarArg; 12834 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 12835 MVT XLenVT = Subtarget.getXLenVT(); 12836 12837 MachineFunction &MF = DAG.getMachineFunction(); 12838 12839 // Analyze the operands of the call, assigning locations to each operand. 12840 SmallVector<CCValAssign, 16> ArgLocs; 12841 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 12842 12843 if (CallConv == CallingConv::GHC) 12844 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 12845 else 12846 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, 12847 CallConv == CallingConv::Fast ? CC_RISCV_FastCC 12848 : CC_RISCV); 12849 12850 // Check if it's really possible to do a tail call. 12851 if (IsTailCall) 12852 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 12853 12854 if (IsTailCall) 12855 ++NumTailCalls; 12856 else if (CLI.CB && CLI.CB->isMustTailCall()) 12857 report_fatal_error("failed to perform tail call elimination on a call " 12858 "site marked musttail"); 12859 12860 // Get a count of how many bytes are to be pushed on the stack. 12861 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 12862 12863 // Create local copies for byval args 12864 SmallVector<SDValue, 8> ByValArgs; 12865 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 12866 ISD::ArgFlagsTy Flags = Outs[i].Flags; 12867 if (!Flags.isByVal()) 12868 continue; 12869 12870 SDValue Arg = OutVals[i]; 12871 unsigned Size = Flags.getByValSize(); 12872 Align Alignment = Flags.getNonZeroByValAlign(); 12873 12874 int FI = 12875 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 12876 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 12877 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 12878 12879 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 12880 /*IsVolatile=*/false, 12881 /*AlwaysInline=*/false, IsTailCall, 12882 MachinePointerInfo(), MachinePointerInfo()); 12883 ByValArgs.push_back(FIPtr); 12884 } 12885 12886 if (!IsTailCall) 12887 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 12888 12889 // Copy argument values to their designated locations. 12890 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 12891 SmallVector<SDValue, 8> MemOpChains; 12892 SDValue StackPtr; 12893 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 12894 CCValAssign &VA = ArgLocs[i]; 12895 SDValue ArgValue = OutVals[i]; 12896 ISD::ArgFlagsTy Flags = Outs[i].Flags; 12897 12898 // Handle passing f64 on RV32D with a soft float ABI as a special case. 12899 bool IsF64OnRV32DSoftABI = 12900 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 12901 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 12902 SDValue SplitF64 = DAG.getNode( 12903 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 12904 SDValue Lo = SplitF64.getValue(0); 12905 SDValue Hi = SplitF64.getValue(1); 12906 12907 Register RegLo = VA.getLocReg(); 12908 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 12909 12910 if (RegLo == RISCV::X17) { 12911 // Second half of f64 is passed on the stack. 12912 // Work out the address of the stack slot. 12913 if (!StackPtr.getNode()) 12914 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 12915 // Emit the store. 12916 MemOpChains.push_back( 12917 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 12918 } else { 12919 // Second half of f64 is passed in another GPR. 12920 assert(RegLo < RISCV::X31 && "Invalid register pair"); 12921 Register RegHigh = RegLo + 1; 12922 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 12923 } 12924 continue; 12925 } 12926 12927 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 12928 // as any other MemLoc. 12929 12930 // Promote the value if needed. 12931 // For now, only handle fully promoted and indirect arguments. 12932 if (VA.getLocInfo() == CCValAssign::Indirect) { 12933 // Store the argument in a stack slot and pass its address. 12934 Align StackAlign = 12935 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), 12936 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 12937 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 12938 // If the original argument was split (e.g. i128), we need 12939 // to store the required parts of it here (and pass just one address). 12940 // Vectors may be partly split to registers and partly to the stack, in 12941 // which case the base address is partly offset and subsequent stores are 12942 // relative to that. 12943 unsigned ArgIndex = Outs[i].OrigArgIndex; 12944 unsigned ArgPartOffset = Outs[i].PartOffset; 12945 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 12946 // Calculate the total size to store. We don't have access to what we're 12947 // actually storing other than performing the loop and collecting the 12948 // info. 12949 SmallVector<std::pair<SDValue, SDValue>> Parts; 12950 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 12951 SDValue PartValue = OutVals[i + 1]; 12952 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 12953 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 12954 EVT PartVT = PartValue.getValueType(); 12955 if (PartVT.isScalableVector()) 12956 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 12957 StoredSize += PartVT.getStoreSize(); 12958 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 12959 Parts.push_back(std::make_pair(PartValue, Offset)); 12960 ++i; 12961 } 12962 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 12963 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 12964 MemOpChains.push_back( 12965 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 12966 MachinePointerInfo::getFixedStack(MF, FI))); 12967 for (const auto &Part : Parts) { 12968 SDValue PartValue = Part.first; 12969 SDValue PartOffset = Part.second; 12970 SDValue Address = 12971 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 12972 MemOpChains.push_back( 12973 DAG.getStore(Chain, DL, PartValue, Address, 12974 MachinePointerInfo::getFixedStack(MF, FI))); 12975 } 12976 ArgValue = SpillSlot; 12977 } else { 12978 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 12979 } 12980 12981 // Use local copy if it is a byval arg. 12982 if (Flags.isByVal()) 12983 ArgValue = ByValArgs[j++]; 12984 12985 if (VA.isRegLoc()) { 12986 // Queue up the argument copies and emit them at the end. 12987 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 12988 } else { 12989 assert(VA.isMemLoc() && "Argument not register or memory"); 12990 assert(!IsTailCall && "Tail call not allowed if stack is used " 12991 "for passing parameters"); 12992 12993 // Work out the address of the stack slot. 12994 if (!StackPtr.getNode()) 12995 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 12996 SDValue Address = 12997 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 12998 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 12999 13000 // Emit the store. 13001 MemOpChains.push_back( 13002 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 13003 } 13004 } 13005 13006 // Join the stores, which are independent of one another. 13007 if (!MemOpChains.empty()) 13008 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 13009 13010 SDValue Glue; 13011 13012 // Build a sequence of copy-to-reg nodes, chained and glued together. 13013 for (auto &Reg : RegsToPass) { 13014 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 13015 Glue = Chain.getValue(1); 13016 } 13017 13018 // Validate that none of the argument registers have been marked as 13019 // reserved, if so report an error. Do the same for the return address if this 13020 // is not a tailcall. 13021 validateCCReservedRegs(RegsToPass, MF); 13022 if (!IsTailCall && 13023 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 13024 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 13025 MF.getFunction(), 13026 "Return address register required, but has been reserved."}); 13027 13028 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 13029 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 13030 // split it and then direct call can be matched by PseudoCALL. 13031 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 13032 const GlobalValue *GV = S->getGlobal(); 13033 13034 unsigned OpFlags = RISCVII::MO_CALL; 13035 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 13036 OpFlags = RISCVII::MO_PLT; 13037 13038 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 13039 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 13040 unsigned OpFlags = RISCVII::MO_CALL; 13041 13042 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 13043 nullptr)) 13044 OpFlags = RISCVII::MO_PLT; 13045 13046 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 13047 } 13048 13049 // The first call operand is the chain and the second is the target address. 13050 SmallVector<SDValue, 8> Ops; 13051 Ops.push_back(Chain); 13052 Ops.push_back(Callee); 13053 13054 // Add argument registers to the end of the list so that they are 13055 // known live into the call. 13056 for (auto &Reg : RegsToPass) 13057 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 13058 13059 if (!IsTailCall) { 13060 // Add a register mask operand representing the call-preserved registers. 13061 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 13062 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 13063 assert(Mask && "Missing call preserved mask for calling convention"); 13064 Ops.push_back(DAG.getRegisterMask(Mask)); 13065 } 13066 13067 // Glue the call to the argument copies, if any. 13068 if (Glue.getNode()) 13069 Ops.push_back(Glue); 13070 13071 // Emit the call. 13072 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 13073 13074 if (IsTailCall) { 13075 MF.getFrameInfo().setHasTailCall(); 13076 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 13077 } 13078 13079 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 13080 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 13081 Glue = Chain.getValue(1); 13082 13083 // Mark the end of the call, which is glued to the call itself. 13084 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 13085 Glue = Chain.getValue(1); 13086 13087 // Assign locations to each value returned by this call. 13088 SmallVector<CCValAssign, 16> RVLocs; 13089 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 13090 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV); 13091 13092 // Copy all of the result registers out of their specified physreg. 13093 for (auto &VA : RVLocs) { 13094 // Copy the value out 13095 SDValue RetValue = 13096 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 13097 // Glue the RetValue to the end of the call sequence 13098 Chain = RetValue.getValue(1); 13099 Glue = RetValue.getValue(2); 13100 13101 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 13102 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 13103 SDValue RetValue2 = 13104 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 13105 Chain = RetValue2.getValue(1); 13106 Glue = RetValue2.getValue(2); 13107 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 13108 RetValue2); 13109 } 13110 13111 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 13112 13113 InVals.push_back(RetValue); 13114 } 13115 13116 return Chain; 13117 } 13118 13119 bool RISCVTargetLowering::CanLowerReturn( 13120 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 13121 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 13122 SmallVector<CCValAssign, 16> RVLocs; 13123 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 13124 13125 std::optional<unsigned> FirstMaskArgument; 13126 if (Subtarget.hasVInstructions()) 13127 FirstMaskArgument = preAssignMask(Outs); 13128 13129 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 13130 MVT VT = Outs[i].VT; 13131 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 13132 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 13133 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 13134 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 13135 *this, FirstMaskArgument)) 13136 return false; 13137 } 13138 return true; 13139 } 13140 13141 SDValue 13142 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 13143 bool IsVarArg, 13144 const SmallVectorImpl<ISD::OutputArg> &Outs, 13145 const SmallVectorImpl<SDValue> &OutVals, 13146 const SDLoc &DL, SelectionDAG &DAG) const { 13147 MachineFunction &MF = DAG.getMachineFunction(); 13148 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 13149 13150 // Stores the assignment of the return value to a location. 13151 SmallVector<CCValAssign, 16> RVLocs; 13152 13153 // Info about the registers and stack slot. 13154 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 13155 *DAG.getContext()); 13156 13157 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 13158 nullptr, CC_RISCV); 13159 13160 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 13161 report_fatal_error("GHC functions return void only"); 13162 13163 SDValue Glue; 13164 SmallVector<SDValue, 4> RetOps(1, Chain); 13165 13166 // Copy the result values into the output registers. 13167 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 13168 SDValue Val = OutVals[i]; 13169 CCValAssign &VA = RVLocs[i]; 13170 assert(VA.isRegLoc() && "Can only return in registers!"); 13171 13172 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 13173 // Handle returning f64 on RV32D with a soft float ABI. 13174 assert(VA.isRegLoc() && "Expected return via registers"); 13175 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 13176 DAG.getVTList(MVT::i32, MVT::i32), Val); 13177 SDValue Lo = SplitF64.getValue(0); 13178 SDValue Hi = SplitF64.getValue(1); 13179 Register RegLo = VA.getLocReg(); 13180 assert(RegLo < RISCV::X31 && "Invalid register pair"); 13181 Register RegHi = RegLo + 1; 13182 13183 if (STI.isRegisterReservedByUser(RegLo) || 13184 STI.isRegisterReservedByUser(RegHi)) 13185 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 13186 MF.getFunction(), 13187 "Return value register required, but has been reserved."}); 13188 13189 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 13190 Glue = Chain.getValue(1); 13191 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 13192 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 13193 Glue = Chain.getValue(1); 13194 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 13195 } else { 13196 // Handle a 'normal' return. 13197 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 13198 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 13199 13200 if (STI.isRegisterReservedByUser(VA.getLocReg())) 13201 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 13202 MF.getFunction(), 13203 "Return value register required, but has been reserved."}); 13204 13205 // Guarantee that all emitted copies are stuck together. 13206 Glue = Chain.getValue(1); 13207 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 13208 } 13209 } 13210 13211 RetOps[0] = Chain; // Update chain. 13212 13213 // Add the glue node if we have it. 13214 if (Glue.getNode()) { 13215 RetOps.push_back(Glue); 13216 } 13217 13218 if (any_of(RVLocs, 13219 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); })) 13220 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); 13221 13222 unsigned RetOpc = RISCVISD::RET_FLAG; 13223 // Interrupt service routines use different return instructions. 13224 const Function &Func = DAG.getMachineFunction().getFunction(); 13225 if (Func.hasFnAttribute("interrupt")) { 13226 if (!Func.getReturnType()->isVoidTy()) 13227 report_fatal_error( 13228 "Functions with the interrupt attribute must have void return type!"); 13229 13230 MachineFunction &MF = DAG.getMachineFunction(); 13231 StringRef Kind = 13232 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 13233 13234 if (Kind == "user") 13235 RetOpc = RISCVISD::URET_FLAG; 13236 else if (Kind == "supervisor") 13237 RetOpc = RISCVISD::SRET_FLAG; 13238 else 13239 RetOpc = RISCVISD::MRET_FLAG; 13240 } 13241 13242 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 13243 } 13244 13245 void RISCVTargetLowering::validateCCReservedRegs( 13246 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 13247 MachineFunction &MF) const { 13248 const Function &F = MF.getFunction(); 13249 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 13250 13251 if (llvm::any_of(Regs, [&STI](auto Reg) { 13252 return STI.isRegisterReservedByUser(Reg.first); 13253 })) 13254 F.getContext().diagnose(DiagnosticInfoUnsupported{ 13255 F, "Argument register required, but has been reserved."}); 13256 } 13257 13258 // Check if the result of the node is only used as a return value, as 13259 // otherwise we can't perform a tail-call. 13260 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 13261 if (N->getNumValues() != 1) 13262 return false; 13263 if (!N->hasNUsesOfValue(1, 0)) 13264 return false; 13265 13266 SDNode *Copy = *N->use_begin(); 13267 // TODO: Handle additional opcodes in order to support tail-calling libcalls 13268 // with soft float ABIs. 13269 if (Copy->getOpcode() != ISD::CopyToReg) { 13270 return false; 13271 } 13272 13273 // If the ISD::CopyToReg has a glue operand, we conservatively assume it 13274 // isn't safe to perform a tail call. 13275 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue) 13276 return false; 13277 13278 // The copy must be used by a RISCVISD::RET_FLAG, and nothing else. 13279 bool HasRet = false; 13280 for (SDNode *Node : Copy->uses()) { 13281 if (Node->getOpcode() != RISCVISD::RET_FLAG) 13282 return false; 13283 HasRet = true; 13284 } 13285 if (!HasRet) 13286 return false; 13287 13288 Chain = Copy->getOperand(0); 13289 return true; 13290 } 13291 13292 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 13293 return CI->isTailCall(); 13294 } 13295 13296 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 13297 #define NODE_NAME_CASE(NODE) \ 13298 case RISCVISD::NODE: \ 13299 return "RISCVISD::" #NODE; 13300 // clang-format off 13301 switch ((RISCVISD::NodeType)Opcode) { 13302 case RISCVISD::FIRST_NUMBER: 13303 break; 13304 NODE_NAME_CASE(RET_FLAG) 13305 NODE_NAME_CASE(URET_FLAG) 13306 NODE_NAME_CASE(SRET_FLAG) 13307 NODE_NAME_CASE(MRET_FLAG) 13308 NODE_NAME_CASE(CALL) 13309 NODE_NAME_CASE(SELECT_CC) 13310 NODE_NAME_CASE(BR_CC) 13311 NODE_NAME_CASE(BuildPairF64) 13312 NODE_NAME_CASE(SplitF64) 13313 NODE_NAME_CASE(TAIL) 13314 NODE_NAME_CASE(ADD_LO) 13315 NODE_NAME_CASE(HI) 13316 NODE_NAME_CASE(LLA) 13317 NODE_NAME_CASE(ADD_TPREL) 13318 NODE_NAME_CASE(LA) 13319 NODE_NAME_CASE(LA_TLS_IE) 13320 NODE_NAME_CASE(LA_TLS_GD) 13321 NODE_NAME_CASE(MULHSU) 13322 NODE_NAME_CASE(SLLW) 13323 NODE_NAME_CASE(SRAW) 13324 NODE_NAME_CASE(SRLW) 13325 NODE_NAME_CASE(DIVW) 13326 NODE_NAME_CASE(DIVUW) 13327 NODE_NAME_CASE(REMUW) 13328 NODE_NAME_CASE(ROLW) 13329 NODE_NAME_CASE(RORW) 13330 NODE_NAME_CASE(CLZW) 13331 NODE_NAME_CASE(CTZW) 13332 NODE_NAME_CASE(ABSW) 13333 NODE_NAME_CASE(FMV_H_X) 13334 NODE_NAME_CASE(FMV_X_ANYEXTH) 13335 NODE_NAME_CASE(FMV_X_SIGNEXTH) 13336 NODE_NAME_CASE(FMV_W_X_RV64) 13337 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 13338 NODE_NAME_CASE(FCVT_X) 13339 NODE_NAME_CASE(FCVT_XU) 13340 NODE_NAME_CASE(FCVT_W_RV64) 13341 NODE_NAME_CASE(FCVT_WU_RV64) 13342 NODE_NAME_CASE(STRICT_FCVT_W_RV64) 13343 NODE_NAME_CASE(STRICT_FCVT_WU_RV64) 13344 NODE_NAME_CASE(FROUND) 13345 NODE_NAME_CASE(READ_CYCLE_WIDE) 13346 NODE_NAME_CASE(BREV8) 13347 NODE_NAME_CASE(ORC_B) 13348 NODE_NAME_CASE(ZIP) 13349 NODE_NAME_CASE(UNZIP) 13350 NODE_NAME_CASE(VMV_V_X_VL) 13351 NODE_NAME_CASE(VFMV_V_F_VL) 13352 NODE_NAME_CASE(VMV_X_S) 13353 NODE_NAME_CASE(VMV_S_X_VL) 13354 NODE_NAME_CASE(VFMV_S_F_VL) 13355 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) 13356 NODE_NAME_CASE(READ_VLENB) 13357 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 13358 NODE_NAME_CASE(VSLIDEUP_VL) 13359 NODE_NAME_CASE(VSLIDE1UP_VL) 13360 NODE_NAME_CASE(VSLIDEDOWN_VL) 13361 NODE_NAME_CASE(VSLIDE1DOWN_VL) 13362 NODE_NAME_CASE(VID_VL) 13363 NODE_NAME_CASE(VFNCVT_ROD_VL) 13364 NODE_NAME_CASE(VECREDUCE_ADD_VL) 13365 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 13366 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 13367 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 13368 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 13369 NODE_NAME_CASE(VECREDUCE_AND_VL) 13370 NODE_NAME_CASE(VECREDUCE_OR_VL) 13371 NODE_NAME_CASE(VECREDUCE_XOR_VL) 13372 NODE_NAME_CASE(VECREDUCE_FADD_VL) 13373 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 13374 NODE_NAME_CASE(VECREDUCE_FMIN_VL) 13375 NODE_NAME_CASE(VECREDUCE_FMAX_VL) 13376 NODE_NAME_CASE(ADD_VL) 13377 NODE_NAME_CASE(AND_VL) 13378 NODE_NAME_CASE(MUL_VL) 13379 NODE_NAME_CASE(OR_VL) 13380 NODE_NAME_CASE(SDIV_VL) 13381 NODE_NAME_CASE(SHL_VL) 13382 NODE_NAME_CASE(SREM_VL) 13383 NODE_NAME_CASE(SRA_VL) 13384 NODE_NAME_CASE(SRL_VL) 13385 NODE_NAME_CASE(SUB_VL) 13386 NODE_NAME_CASE(UDIV_VL) 13387 NODE_NAME_CASE(UREM_VL) 13388 NODE_NAME_CASE(XOR_VL) 13389 NODE_NAME_CASE(SADDSAT_VL) 13390 NODE_NAME_CASE(UADDSAT_VL) 13391 NODE_NAME_CASE(SSUBSAT_VL) 13392 NODE_NAME_CASE(USUBSAT_VL) 13393 NODE_NAME_CASE(FADD_VL) 13394 NODE_NAME_CASE(FSUB_VL) 13395 NODE_NAME_CASE(FMUL_VL) 13396 NODE_NAME_CASE(FDIV_VL) 13397 NODE_NAME_CASE(FNEG_VL) 13398 NODE_NAME_CASE(FABS_VL) 13399 NODE_NAME_CASE(FSQRT_VL) 13400 NODE_NAME_CASE(VFMADD_VL) 13401 NODE_NAME_CASE(VFNMADD_VL) 13402 NODE_NAME_CASE(VFMSUB_VL) 13403 NODE_NAME_CASE(VFNMSUB_VL) 13404 NODE_NAME_CASE(FCOPYSIGN_VL) 13405 NODE_NAME_CASE(SMIN_VL) 13406 NODE_NAME_CASE(SMAX_VL) 13407 NODE_NAME_CASE(UMIN_VL) 13408 NODE_NAME_CASE(UMAX_VL) 13409 NODE_NAME_CASE(FMINNUM_VL) 13410 NODE_NAME_CASE(FMAXNUM_VL) 13411 NODE_NAME_CASE(MULHS_VL) 13412 NODE_NAME_CASE(MULHU_VL) 13413 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL) 13414 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL) 13415 NODE_NAME_CASE(VFCVT_RM_X_F_VL) 13416 NODE_NAME_CASE(VFCVT_RM_XU_F_VL) 13417 NODE_NAME_CASE(VFCVT_X_F_VL) 13418 NODE_NAME_CASE(VFCVT_XU_F_VL) 13419 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL) 13420 NODE_NAME_CASE(SINT_TO_FP_VL) 13421 NODE_NAME_CASE(UINT_TO_FP_VL) 13422 NODE_NAME_CASE(VFCVT_RM_F_XU_VL) 13423 NODE_NAME_CASE(VFCVT_RM_F_X_VL) 13424 NODE_NAME_CASE(FP_EXTEND_VL) 13425 NODE_NAME_CASE(FP_ROUND_VL) 13426 NODE_NAME_CASE(VWMUL_VL) 13427 NODE_NAME_CASE(VWMULU_VL) 13428 NODE_NAME_CASE(VWMULSU_VL) 13429 NODE_NAME_CASE(VWADD_VL) 13430 NODE_NAME_CASE(VWADDU_VL) 13431 NODE_NAME_CASE(VWSUB_VL) 13432 NODE_NAME_CASE(VWSUBU_VL) 13433 NODE_NAME_CASE(VWADD_W_VL) 13434 NODE_NAME_CASE(VWADDU_W_VL) 13435 NODE_NAME_CASE(VWSUB_W_VL) 13436 NODE_NAME_CASE(VWSUBU_W_VL) 13437 NODE_NAME_CASE(VNSRL_VL) 13438 NODE_NAME_CASE(SETCC_VL) 13439 NODE_NAME_CASE(VSELECT_VL) 13440 NODE_NAME_CASE(VP_MERGE_VL) 13441 NODE_NAME_CASE(VMAND_VL) 13442 NODE_NAME_CASE(VMOR_VL) 13443 NODE_NAME_CASE(VMXOR_VL) 13444 NODE_NAME_CASE(VMCLR_VL) 13445 NODE_NAME_CASE(VMSET_VL) 13446 NODE_NAME_CASE(VRGATHER_VX_VL) 13447 NODE_NAME_CASE(VRGATHER_VV_VL) 13448 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 13449 NODE_NAME_CASE(VSEXT_VL) 13450 NODE_NAME_CASE(VZEXT_VL) 13451 NODE_NAME_CASE(VCPOP_VL) 13452 NODE_NAME_CASE(VFIRST_VL) 13453 NODE_NAME_CASE(READ_CSR) 13454 NODE_NAME_CASE(WRITE_CSR) 13455 NODE_NAME_CASE(SWAP_CSR) 13456 } 13457 // clang-format on 13458 return nullptr; 13459 #undef NODE_NAME_CASE 13460 } 13461 13462 /// getConstraintType - Given a constraint letter, return the type of 13463 /// constraint it is for this target. 13464 RISCVTargetLowering::ConstraintType 13465 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 13466 if (Constraint.size() == 1) { 13467 switch (Constraint[0]) { 13468 default: 13469 break; 13470 case 'f': 13471 return C_RegisterClass; 13472 case 'I': 13473 case 'J': 13474 case 'K': 13475 return C_Immediate; 13476 case 'A': 13477 return C_Memory; 13478 case 'S': // A symbolic address 13479 return C_Other; 13480 } 13481 } else { 13482 if (Constraint == "vr" || Constraint == "vm") 13483 return C_RegisterClass; 13484 } 13485 return TargetLowering::getConstraintType(Constraint); 13486 } 13487 13488 std::pair<unsigned, const TargetRegisterClass *> 13489 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 13490 StringRef Constraint, 13491 MVT VT) const { 13492 // First, see if this is a constraint that directly corresponds to a 13493 // RISCV register class. 13494 if (Constraint.size() == 1) { 13495 switch (Constraint[0]) { 13496 case 'r': 13497 // TODO: Support fixed vectors up to XLen for P extension? 13498 if (VT.isVector()) 13499 break; 13500 return std::make_pair(0U, &RISCV::GPRRegClass); 13501 case 'f': 13502 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) 13503 return std::make_pair(0U, &RISCV::FPR16RegClass); 13504 if (Subtarget.hasStdExtF() && VT == MVT::f32) 13505 return std::make_pair(0U, &RISCV::FPR32RegClass); 13506 if (Subtarget.hasStdExtD() && VT == MVT::f64) 13507 return std::make_pair(0U, &RISCV::FPR64RegClass); 13508 break; 13509 default: 13510 break; 13511 } 13512 } else if (Constraint == "vr") { 13513 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, 13514 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 13515 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 13516 return std::make_pair(0U, RC); 13517 } 13518 } else if (Constraint == "vm") { 13519 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) 13520 return std::make_pair(0U, &RISCV::VMV0RegClass); 13521 } 13522 13523 // Clang will correctly decode the usage of register name aliases into their 13524 // official names. However, other frontends like `rustc` do not. This allows 13525 // users of these frontends to use the ABI names for registers in LLVM-style 13526 // register constraints. 13527 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 13528 .Case("{zero}", RISCV::X0) 13529 .Case("{ra}", RISCV::X1) 13530 .Case("{sp}", RISCV::X2) 13531 .Case("{gp}", RISCV::X3) 13532 .Case("{tp}", RISCV::X4) 13533 .Case("{t0}", RISCV::X5) 13534 .Case("{t1}", RISCV::X6) 13535 .Case("{t2}", RISCV::X7) 13536 .Cases("{s0}", "{fp}", RISCV::X8) 13537 .Case("{s1}", RISCV::X9) 13538 .Case("{a0}", RISCV::X10) 13539 .Case("{a1}", RISCV::X11) 13540 .Case("{a2}", RISCV::X12) 13541 .Case("{a3}", RISCV::X13) 13542 .Case("{a4}", RISCV::X14) 13543 .Case("{a5}", RISCV::X15) 13544 .Case("{a6}", RISCV::X16) 13545 .Case("{a7}", RISCV::X17) 13546 .Case("{s2}", RISCV::X18) 13547 .Case("{s3}", RISCV::X19) 13548 .Case("{s4}", RISCV::X20) 13549 .Case("{s5}", RISCV::X21) 13550 .Case("{s6}", RISCV::X22) 13551 .Case("{s7}", RISCV::X23) 13552 .Case("{s8}", RISCV::X24) 13553 .Case("{s9}", RISCV::X25) 13554 .Case("{s10}", RISCV::X26) 13555 .Case("{s11}", RISCV::X27) 13556 .Case("{t3}", RISCV::X28) 13557 .Case("{t4}", RISCV::X29) 13558 .Case("{t5}", RISCV::X30) 13559 .Case("{t6}", RISCV::X31) 13560 .Default(RISCV::NoRegister); 13561 if (XRegFromAlias != RISCV::NoRegister) 13562 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 13563 13564 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 13565 // TableGen record rather than the AsmName to choose registers for InlineAsm 13566 // constraints, plus we want to match those names to the widest floating point 13567 // register type available, manually select floating point registers here. 13568 // 13569 // The second case is the ABI name of the register, so that frontends can also 13570 // use the ABI names in register constraint lists. 13571 if (Subtarget.hasStdExtF()) { 13572 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 13573 .Cases("{f0}", "{ft0}", RISCV::F0_F) 13574 .Cases("{f1}", "{ft1}", RISCV::F1_F) 13575 .Cases("{f2}", "{ft2}", RISCV::F2_F) 13576 .Cases("{f3}", "{ft3}", RISCV::F3_F) 13577 .Cases("{f4}", "{ft4}", RISCV::F4_F) 13578 .Cases("{f5}", "{ft5}", RISCV::F5_F) 13579 .Cases("{f6}", "{ft6}", RISCV::F6_F) 13580 .Cases("{f7}", "{ft7}", RISCV::F7_F) 13581 .Cases("{f8}", "{fs0}", RISCV::F8_F) 13582 .Cases("{f9}", "{fs1}", RISCV::F9_F) 13583 .Cases("{f10}", "{fa0}", RISCV::F10_F) 13584 .Cases("{f11}", "{fa1}", RISCV::F11_F) 13585 .Cases("{f12}", "{fa2}", RISCV::F12_F) 13586 .Cases("{f13}", "{fa3}", RISCV::F13_F) 13587 .Cases("{f14}", "{fa4}", RISCV::F14_F) 13588 .Cases("{f15}", "{fa5}", RISCV::F15_F) 13589 .Cases("{f16}", "{fa6}", RISCV::F16_F) 13590 .Cases("{f17}", "{fa7}", RISCV::F17_F) 13591 .Cases("{f18}", "{fs2}", RISCV::F18_F) 13592 .Cases("{f19}", "{fs3}", RISCV::F19_F) 13593 .Cases("{f20}", "{fs4}", RISCV::F20_F) 13594 .Cases("{f21}", "{fs5}", RISCV::F21_F) 13595 .Cases("{f22}", "{fs6}", RISCV::F22_F) 13596 .Cases("{f23}", "{fs7}", RISCV::F23_F) 13597 .Cases("{f24}", "{fs8}", RISCV::F24_F) 13598 .Cases("{f25}", "{fs9}", RISCV::F25_F) 13599 .Cases("{f26}", "{fs10}", RISCV::F26_F) 13600 .Cases("{f27}", "{fs11}", RISCV::F27_F) 13601 .Cases("{f28}", "{ft8}", RISCV::F28_F) 13602 .Cases("{f29}", "{ft9}", RISCV::F29_F) 13603 .Cases("{f30}", "{ft10}", RISCV::F30_F) 13604 .Cases("{f31}", "{ft11}", RISCV::F31_F) 13605 .Default(RISCV::NoRegister); 13606 if (FReg != RISCV::NoRegister) { 13607 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 13608 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) { 13609 unsigned RegNo = FReg - RISCV::F0_F; 13610 unsigned DReg = RISCV::F0_D + RegNo; 13611 return std::make_pair(DReg, &RISCV::FPR64RegClass); 13612 } 13613 if (VT == MVT::f32 || VT == MVT::Other) 13614 return std::make_pair(FReg, &RISCV::FPR32RegClass); 13615 if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) { 13616 unsigned RegNo = FReg - RISCV::F0_F; 13617 unsigned HReg = RISCV::F0_H + RegNo; 13618 return std::make_pair(HReg, &RISCV::FPR16RegClass); 13619 } 13620 } 13621 } 13622 13623 if (Subtarget.hasVInstructions()) { 13624 Register VReg = StringSwitch<Register>(Constraint.lower()) 13625 .Case("{v0}", RISCV::V0) 13626 .Case("{v1}", RISCV::V1) 13627 .Case("{v2}", RISCV::V2) 13628 .Case("{v3}", RISCV::V3) 13629 .Case("{v4}", RISCV::V4) 13630 .Case("{v5}", RISCV::V5) 13631 .Case("{v6}", RISCV::V6) 13632 .Case("{v7}", RISCV::V7) 13633 .Case("{v8}", RISCV::V8) 13634 .Case("{v9}", RISCV::V9) 13635 .Case("{v10}", RISCV::V10) 13636 .Case("{v11}", RISCV::V11) 13637 .Case("{v12}", RISCV::V12) 13638 .Case("{v13}", RISCV::V13) 13639 .Case("{v14}", RISCV::V14) 13640 .Case("{v15}", RISCV::V15) 13641 .Case("{v16}", RISCV::V16) 13642 .Case("{v17}", RISCV::V17) 13643 .Case("{v18}", RISCV::V18) 13644 .Case("{v19}", RISCV::V19) 13645 .Case("{v20}", RISCV::V20) 13646 .Case("{v21}", RISCV::V21) 13647 .Case("{v22}", RISCV::V22) 13648 .Case("{v23}", RISCV::V23) 13649 .Case("{v24}", RISCV::V24) 13650 .Case("{v25}", RISCV::V25) 13651 .Case("{v26}", RISCV::V26) 13652 .Case("{v27}", RISCV::V27) 13653 .Case("{v28}", RISCV::V28) 13654 .Case("{v29}", RISCV::V29) 13655 .Case("{v30}", RISCV::V30) 13656 .Case("{v31}", RISCV::V31) 13657 .Default(RISCV::NoRegister); 13658 if (VReg != RISCV::NoRegister) { 13659 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 13660 return std::make_pair(VReg, &RISCV::VMRegClass); 13661 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 13662 return std::make_pair(VReg, &RISCV::VRRegClass); 13663 for (const auto *RC : 13664 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 13665 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 13666 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 13667 return std::make_pair(VReg, RC); 13668 } 13669 } 13670 } 13671 } 13672 13673 std::pair<Register, const TargetRegisterClass *> Res = 13674 TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 13675 13676 // If we picked one of the Zfinx register classes, remap it to the GPR class. 13677 // FIXME: When Zfinx is supported in CodeGen this will need to take the 13678 // Subtarget into account. 13679 if (Res.second == &RISCV::GPRF16RegClass || 13680 Res.second == &RISCV::GPRF32RegClass || 13681 Res.second == &RISCV::GPRF64RegClass) 13682 return std::make_pair(Res.first, &RISCV::GPRRegClass); 13683 13684 return Res; 13685 } 13686 13687 unsigned 13688 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 13689 // Currently only support length 1 constraints. 13690 if (ConstraintCode.size() == 1) { 13691 switch (ConstraintCode[0]) { 13692 case 'A': 13693 return InlineAsm::Constraint_A; 13694 default: 13695 break; 13696 } 13697 } 13698 13699 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 13700 } 13701 13702 void RISCVTargetLowering::LowerAsmOperandForConstraint( 13703 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 13704 SelectionDAG &DAG) const { 13705 // Currently only support length 1 constraints. 13706 if (Constraint.length() == 1) { 13707 switch (Constraint[0]) { 13708 case 'I': 13709 // Validate & create a 12-bit signed immediate operand. 13710 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 13711 uint64_t CVal = C->getSExtValue(); 13712 if (isInt<12>(CVal)) 13713 Ops.push_back( 13714 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 13715 } 13716 return; 13717 case 'J': 13718 // Validate & create an integer zero operand. 13719 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 13720 if (C->getZExtValue() == 0) 13721 Ops.push_back( 13722 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 13723 return; 13724 case 'K': 13725 // Validate & create a 5-bit unsigned immediate operand. 13726 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 13727 uint64_t CVal = C->getZExtValue(); 13728 if (isUInt<5>(CVal)) 13729 Ops.push_back( 13730 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 13731 } 13732 return; 13733 case 'S': 13734 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 13735 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), 13736 GA->getValueType(0))); 13737 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) { 13738 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(), 13739 BA->getValueType(0))); 13740 } 13741 return; 13742 default: 13743 break; 13744 } 13745 } 13746 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 13747 } 13748 13749 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, 13750 Instruction *Inst, 13751 AtomicOrdering Ord) const { 13752 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 13753 return Builder.CreateFence(Ord); 13754 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 13755 return Builder.CreateFence(AtomicOrdering::Release); 13756 return nullptr; 13757 } 13758 13759 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, 13760 Instruction *Inst, 13761 AtomicOrdering Ord) const { 13762 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 13763 return Builder.CreateFence(AtomicOrdering::Acquire); 13764 return nullptr; 13765 } 13766 13767 TargetLowering::AtomicExpansionKind 13768 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 13769 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 13770 // point operations can't be used in an lr/sc sequence without breaking the 13771 // forward-progress guarantee. 13772 if (AI->isFloatingPointOperation() || 13773 AI->getOperation() == AtomicRMWInst::UIncWrap || 13774 AI->getOperation() == AtomicRMWInst::UDecWrap) 13775 return AtomicExpansionKind::CmpXChg; 13776 13777 // Don't expand forced atomics, we want to have __sync libcalls instead. 13778 if (Subtarget.hasForcedAtomics()) 13779 return AtomicExpansionKind::None; 13780 13781 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 13782 if (Size == 8 || Size == 16) 13783 return AtomicExpansionKind::MaskedIntrinsic; 13784 return AtomicExpansionKind::None; 13785 } 13786 13787 static Intrinsic::ID 13788 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 13789 if (XLen == 32) { 13790 switch (BinOp) { 13791 default: 13792 llvm_unreachable("Unexpected AtomicRMW BinOp"); 13793 case AtomicRMWInst::Xchg: 13794 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 13795 case AtomicRMWInst::Add: 13796 return Intrinsic::riscv_masked_atomicrmw_add_i32; 13797 case AtomicRMWInst::Sub: 13798 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 13799 case AtomicRMWInst::Nand: 13800 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 13801 case AtomicRMWInst::Max: 13802 return Intrinsic::riscv_masked_atomicrmw_max_i32; 13803 case AtomicRMWInst::Min: 13804 return Intrinsic::riscv_masked_atomicrmw_min_i32; 13805 case AtomicRMWInst::UMax: 13806 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 13807 case AtomicRMWInst::UMin: 13808 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 13809 } 13810 } 13811 13812 if (XLen == 64) { 13813 switch (BinOp) { 13814 default: 13815 llvm_unreachable("Unexpected AtomicRMW BinOp"); 13816 case AtomicRMWInst::Xchg: 13817 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 13818 case AtomicRMWInst::Add: 13819 return Intrinsic::riscv_masked_atomicrmw_add_i64; 13820 case AtomicRMWInst::Sub: 13821 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 13822 case AtomicRMWInst::Nand: 13823 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 13824 case AtomicRMWInst::Max: 13825 return Intrinsic::riscv_masked_atomicrmw_max_i64; 13826 case AtomicRMWInst::Min: 13827 return Intrinsic::riscv_masked_atomicrmw_min_i64; 13828 case AtomicRMWInst::UMax: 13829 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 13830 case AtomicRMWInst::UMin: 13831 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 13832 } 13833 } 13834 13835 llvm_unreachable("Unexpected XLen\n"); 13836 } 13837 13838 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 13839 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 13840 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 13841 unsigned XLen = Subtarget.getXLen(); 13842 Value *Ordering = 13843 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 13844 Type *Tys[] = {AlignedAddr->getType()}; 13845 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 13846 AI->getModule(), 13847 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 13848 13849 if (XLen == 64) { 13850 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 13851 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 13852 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 13853 } 13854 13855 Value *Result; 13856 13857 // Must pass the shift amount needed to sign extend the loaded value prior 13858 // to performing a signed comparison for min/max. ShiftAmt is the number of 13859 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 13860 // is the number of bits to left+right shift the value in order to 13861 // sign-extend. 13862 if (AI->getOperation() == AtomicRMWInst::Min || 13863 AI->getOperation() == AtomicRMWInst::Max) { 13864 const DataLayout &DL = AI->getModule()->getDataLayout(); 13865 unsigned ValWidth = 13866 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 13867 Value *SextShamt = 13868 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 13869 Result = Builder.CreateCall(LrwOpScwLoop, 13870 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 13871 } else { 13872 Result = 13873 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 13874 } 13875 13876 if (XLen == 64) 13877 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 13878 return Result; 13879 } 13880 13881 TargetLowering::AtomicExpansionKind 13882 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 13883 AtomicCmpXchgInst *CI) const { 13884 // Don't expand forced atomics, we want to have __sync libcalls instead. 13885 if (Subtarget.hasForcedAtomics()) 13886 return AtomicExpansionKind::None; 13887 13888 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 13889 if (Size == 8 || Size == 16) 13890 return AtomicExpansionKind::MaskedIntrinsic; 13891 return AtomicExpansionKind::None; 13892 } 13893 13894 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 13895 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 13896 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 13897 unsigned XLen = Subtarget.getXLen(); 13898 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 13899 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 13900 if (XLen == 64) { 13901 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 13902 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 13903 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 13904 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 13905 } 13906 Type *Tys[] = {AlignedAddr->getType()}; 13907 Function *MaskedCmpXchg = 13908 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 13909 Value *Result = Builder.CreateCall( 13910 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 13911 if (XLen == 64) 13912 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 13913 return Result; 13914 } 13915 13916 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT, 13917 EVT DataVT) const { 13918 return false; 13919 } 13920 13921 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, 13922 EVT VT) const { 13923 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) 13924 return false; 13925 13926 switch (FPVT.getSimpleVT().SimpleTy) { 13927 case MVT::f16: 13928 return Subtarget.hasStdExtZfhOrZfhmin(); 13929 case MVT::f32: 13930 return Subtarget.hasStdExtF(); 13931 case MVT::f64: 13932 return Subtarget.hasStdExtD(); 13933 default: 13934 return false; 13935 } 13936 } 13937 13938 unsigned RISCVTargetLowering::getJumpTableEncoding() const { 13939 // If we are using the small code model, we can reduce size of jump table 13940 // entry to 4 bytes. 13941 if (Subtarget.is64Bit() && !isPositionIndependent() && 13942 getTargetMachine().getCodeModel() == CodeModel::Small) { 13943 return MachineJumpTableInfo::EK_Custom32; 13944 } 13945 return TargetLowering::getJumpTableEncoding(); 13946 } 13947 13948 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( 13949 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, 13950 unsigned uid, MCContext &Ctx) const { 13951 assert(Subtarget.is64Bit() && !isPositionIndependent() && 13952 getTargetMachine().getCodeModel() == CodeModel::Small); 13953 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); 13954 } 13955 13956 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { 13957 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power 13958 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be 13959 // a power of two as well. 13960 // FIXME: This doesn't work for zve32, but that's already broken 13961 // elsewhere for the same reason. 13962 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); 13963 static_assert(RISCV::RVVBitsPerBlock == 64, 13964 "RVVBitsPerBlock changed, audit needed"); 13965 return true; 13966 } 13967 13968 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 13969 EVT VT) const { 13970 EVT SVT = VT.getScalarType(); 13971 13972 if (!SVT.isSimple()) 13973 return false; 13974 13975 switch (SVT.getSimpleVT().SimpleTy) { 13976 case MVT::f16: 13977 return VT.isVector() ? Subtarget.hasVInstructionsF16() 13978 : Subtarget.hasStdExtZfh(); 13979 case MVT::f32: 13980 return Subtarget.hasStdExtF(); 13981 case MVT::f64: 13982 return Subtarget.hasStdExtD(); 13983 default: 13984 break; 13985 } 13986 13987 return false; 13988 } 13989 13990 Register RISCVTargetLowering::getExceptionPointerRegister( 13991 const Constant *PersonalityFn) const { 13992 return RISCV::X10; 13993 } 13994 13995 Register RISCVTargetLowering::getExceptionSelectorRegister( 13996 const Constant *PersonalityFn) const { 13997 return RISCV::X11; 13998 } 13999 14000 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 14001 // Return false to suppress the unnecessary extensions if the LibCall 14002 // arguments or return value is f32 type for LP64 ABI. 14003 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 14004 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 14005 return false; 14006 14007 return true; 14008 } 14009 14010 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 14011 if (Subtarget.is64Bit() && Type == MVT::i32) 14012 return true; 14013 14014 return IsSigned; 14015 } 14016 14017 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 14018 SDValue C) const { 14019 // Check integral scalar types. 14020 const bool HasExtMOrZmmul = 14021 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul(); 14022 if (VT.isScalarInteger()) { 14023 // Omit the optimization if the sub target has the M extension and the data 14024 // size exceeds XLen. 14025 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen()) 14026 return false; 14027 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 14028 // Break the MUL to a SLLI and an ADD/SUB. 14029 const APInt &Imm = ConstNode->getAPIntValue(); 14030 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 14031 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 14032 return true; 14033 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. 14034 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) && 14035 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 14036 (Imm - 8).isPowerOf2())) 14037 return true; 14038 // Omit the following optimization if the sub target has the M extension 14039 // and the data size >= XLen. 14040 if (HasExtMOrZmmul && VT.getSizeInBits() >= Subtarget.getXLen()) 14041 return false; 14042 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 14043 // a pair of LUI/ADDI. 14044 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 14045 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 14046 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 14047 (1 - ImmS).isPowerOf2()) 14048 return true; 14049 } 14050 } 14051 } 14052 14053 return false; 14054 } 14055 14056 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode, 14057 SDValue ConstNode) const { 14058 // Let the DAGCombiner decide for vectors. 14059 EVT VT = AddNode.getValueType(); 14060 if (VT.isVector()) 14061 return true; 14062 14063 // Let the DAGCombiner decide for larger types. 14064 if (VT.getScalarSizeInBits() > Subtarget.getXLen()) 14065 return true; 14066 14067 // It is worse if c1 is simm12 while c1*c2 is not. 14068 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1)); 14069 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode); 14070 const APInt &C1 = C1Node->getAPIntValue(); 14071 const APInt &C2 = C2Node->getAPIntValue(); 14072 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12)) 14073 return false; 14074 14075 // Default to true and let the DAGCombiner decide. 14076 return true; 14077 } 14078 14079 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 14080 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 14081 unsigned *Fast) const { 14082 if (!VT.isVector()) { 14083 if (Fast) 14084 *Fast = 0; 14085 return Subtarget.enableUnalignedScalarMem(); 14086 } 14087 14088 // All vector implementations must support element alignment 14089 EVT ElemVT = VT.getVectorElementType(); 14090 if (Alignment >= ElemVT.getStoreSize()) { 14091 if (Fast) 14092 *Fast = 1; 14093 return true; 14094 } 14095 14096 return false; 14097 } 14098 14099 bool RISCVTargetLowering::splitValueIntoRegisterParts( 14100 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 14101 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 14102 bool IsABIRegCopy = CC.has_value(); 14103 EVT ValueVT = Val.getValueType(); 14104 if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { 14105 // Cast the f16 to i16, extend to i32, pad with ones to make a float nan, 14106 // and cast to f32. 14107 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); 14108 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); 14109 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, 14110 DAG.getConstant(0xFFFF0000, DL, MVT::i32)); 14111 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 14112 Parts[0] = Val; 14113 return true; 14114 } 14115 14116 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 14117 LLVMContext &Context = *DAG.getContext(); 14118 EVT ValueEltVT = ValueVT.getVectorElementType(); 14119 EVT PartEltVT = PartVT.getVectorElementType(); 14120 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 14121 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 14122 if (PartVTBitSize % ValueVTBitSize == 0) { 14123 assert(PartVTBitSize >= ValueVTBitSize); 14124 // If the element types are different, bitcast to the same element type of 14125 // PartVT first. 14126 // Give an example here, we want copy a <vscale x 1 x i8> value to 14127 // <vscale x 4 x i16>. 14128 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert 14129 // subvector, then we can bitcast to <vscale x 4 x i16>. 14130 if (ValueEltVT != PartEltVT) { 14131 if (PartVTBitSize > ValueVTBitSize) { 14132 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 14133 assert(Count != 0 && "The number of element should not be zero."); 14134 EVT SameEltTypeVT = 14135 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 14136 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT, 14137 DAG.getUNDEF(SameEltTypeVT), Val, 14138 DAG.getVectorIdxConstant(0, DL)); 14139 } 14140 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 14141 } else { 14142 Val = 14143 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 14144 Val, DAG.getVectorIdxConstant(0, DL)); 14145 } 14146 Parts[0] = Val; 14147 return true; 14148 } 14149 } 14150 return false; 14151 } 14152 14153 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 14154 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 14155 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 14156 bool IsABIRegCopy = CC.has_value(); 14157 if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { 14158 SDValue Val = Parts[0]; 14159 14160 // Cast the f32 to i32, truncate to i16, and cast back to f16. 14161 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 14162 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); 14163 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val); 14164 return Val; 14165 } 14166 14167 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 14168 LLVMContext &Context = *DAG.getContext(); 14169 SDValue Val = Parts[0]; 14170 EVT ValueEltVT = ValueVT.getVectorElementType(); 14171 EVT PartEltVT = PartVT.getVectorElementType(); 14172 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue(); 14173 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue(); 14174 if (PartVTBitSize % ValueVTBitSize == 0) { 14175 assert(PartVTBitSize >= ValueVTBitSize); 14176 EVT SameEltTypeVT = ValueVT; 14177 // If the element types are different, convert it to the same element type 14178 // of PartVT. 14179 // Give an example here, we want copy a <vscale x 1 x i8> value from 14180 // <vscale x 4 x i16>. 14181 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, 14182 // then we can extract <vscale x 1 x i8>. 14183 if (ValueEltVT != PartEltVT) { 14184 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); 14185 assert(Count != 0 && "The number of element should not be zero."); 14186 SameEltTypeVT = 14187 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); 14188 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 14189 } 14190 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 14191 DAG.getVectorIdxConstant(0, DL)); 14192 return Val; 14193 } 14194 } 14195 return SDValue(); 14196 } 14197 14198 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { 14199 // When aggressively optimizing for code size, we prefer to use a div 14200 // instruction, as it is usually smaller than the alternative sequence. 14201 // TODO: Add vector division? 14202 bool OptSize = Attr.hasFnAttr(Attribute::MinSize); 14203 return OptSize && !VT.isVector(); 14204 } 14205 14206 bool RISCVTargetLowering::preferScalarizeSplat(unsigned Opc) const { 14207 // Scalarize zero_ext and sign_ext might stop match to widening instruction in 14208 // some situation. 14209 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND) 14210 return false; 14211 return true; 14212 } 14213 14214 #define GET_REGISTER_MATCHER 14215 #include "RISCVGenAsmMatcher.inc" 14216 14217 Register 14218 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 14219 const MachineFunction &MF) const { 14220 Register Reg = MatchRegisterAltName(RegName); 14221 if (Reg == RISCV::NoRegister) 14222 Reg = MatchRegisterName(RegName); 14223 if (Reg == RISCV::NoRegister) 14224 report_fatal_error( 14225 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 14226 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 14227 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 14228 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 14229 StringRef(RegName) + "\".")); 14230 return Reg; 14231 } 14232 14233 namespace llvm::RISCVVIntrinsicsTable { 14234 14235 #define GET_RISCVVIntrinsicsTable_IMPL 14236 #include "RISCVGenSearchableTables.inc" 14237 14238 } // namespace llvm::RISCVVIntrinsicsTable 14239