1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVRegisterInfo.h" 19 #include "RISCVSubtarget.h" 20 #include "RISCVTargetMachine.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/MemoryLocation.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29 #include "llvm/CodeGen/ValueTypes.h" 30 #include "llvm/IR/DiagnosticInfo.h" 31 #include "llvm/IR/DiagnosticPrinter.h" 32 #include "llvm/IR/IRBuilder.h" 33 #include "llvm/IR/IntrinsicsRISCV.h" 34 #include "llvm/IR/PatternMatch.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/KnownBits.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/raw_ostream.h" 40 41 using namespace llvm; 42 43 #define DEBUG_TYPE "riscv-lower" 44 45 STATISTIC(NumTailCalls, "Number of tail calls"); 46 47 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 48 const RISCVSubtarget &STI) 49 : TargetLowering(TM), Subtarget(STI) { 50 51 if (Subtarget.isRV32E()) 52 report_fatal_error("Codegen not yet implemented for RV32E"); 53 54 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 55 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 56 57 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 58 !Subtarget.hasStdExtF()) { 59 errs() << "Hard-float 'f' ABI can't be used for a target that " 60 "doesn't support the F instruction set extension (ignoring " 61 "target-abi)\n"; 62 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 63 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 64 !Subtarget.hasStdExtD()) { 65 errs() << "Hard-float 'd' ABI can't be used for a target that " 66 "doesn't support the D instruction set extension (ignoring " 67 "target-abi)\n"; 68 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 69 } 70 71 switch (ABI) { 72 default: 73 report_fatal_error("Don't know how to lower this ABI"); 74 case RISCVABI::ABI_ILP32: 75 case RISCVABI::ABI_ILP32F: 76 case RISCVABI::ABI_ILP32D: 77 case RISCVABI::ABI_LP64: 78 case RISCVABI::ABI_LP64F: 79 case RISCVABI::ABI_LP64D: 80 break; 81 } 82 83 MVT XLenVT = Subtarget.getXLenVT(); 84 85 // Set up the register classes. 86 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 87 88 if (Subtarget.hasStdExtZfh()) 89 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); 90 if (Subtarget.hasStdExtF()) 91 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 92 if (Subtarget.hasStdExtD()) 93 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 94 95 static const MVT::SimpleValueType BoolVecVTs[] = { 96 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, 97 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1}; 98 static const MVT::SimpleValueType IntVecVTs[] = { 99 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8, 100 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16, 101 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32, 102 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64, 103 MVT::nxv4i64, MVT::nxv8i64}; 104 static const MVT::SimpleValueType F16VecVTs[] = { 105 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16, 106 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16}; 107 static const MVT::SimpleValueType F32VecVTs[] = { 108 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32}; 109 static const MVT::SimpleValueType F64VecVTs[] = { 110 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64}; 111 112 if (Subtarget.hasVInstructions()) { 113 auto addRegClassForRVV = [this](MVT VT) { 114 unsigned Size = VT.getSizeInBits().getKnownMinValue(); 115 assert(Size <= 512 && isPowerOf2_32(Size)); 116 const TargetRegisterClass *RC; 117 if (Size <= 64) 118 RC = &RISCV::VRRegClass; 119 else if (Size == 128) 120 RC = &RISCV::VRM2RegClass; 121 else if (Size == 256) 122 RC = &RISCV::VRM4RegClass; 123 else 124 RC = &RISCV::VRM8RegClass; 125 126 addRegisterClass(VT, RC); 127 }; 128 129 for (MVT VT : BoolVecVTs) 130 addRegClassForRVV(VT); 131 for (MVT VT : IntVecVTs) { 132 if (VT.getVectorElementType() == MVT::i64 && 133 !Subtarget.hasVInstructionsI64()) 134 continue; 135 addRegClassForRVV(VT); 136 } 137 138 if (Subtarget.hasVInstructionsF16()) 139 for (MVT VT : F16VecVTs) 140 addRegClassForRVV(VT); 141 142 if (Subtarget.hasVInstructionsF32()) 143 for (MVT VT : F32VecVTs) 144 addRegClassForRVV(VT); 145 146 if (Subtarget.hasVInstructionsF64()) 147 for (MVT VT : F64VecVTs) 148 addRegClassForRVV(VT); 149 150 if (Subtarget.useRVVForFixedLengthVectors()) { 151 auto addRegClassForFixedVectors = [this](MVT VT) { 152 MVT ContainerVT = getContainerForFixedLengthVector(VT); 153 unsigned RCID = getRegClassIDForVecVT(ContainerVT); 154 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 155 addRegisterClass(VT, TRI.getRegClass(RCID)); 156 }; 157 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 158 if (useRVVForFixedLengthVectorVT(VT)) 159 addRegClassForFixedVectors(VT); 160 161 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) 162 if (useRVVForFixedLengthVectorVT(VT)) 163 addRegClassForFixedVectors(VT); 164 } 165 } 166 167 // Compute derived properties from the register classes. 168 computeRegisterProperties(STI.getRegisterInfo()); 169 170 setStackPointerRegisterToSaveRestore(RISCV::X2); 171 172 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 173 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 174 175 // TODO: add all necessary setOperationAction calls. 176 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 177 178 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 179 setOperationAction(ISD::BR_CC, XLenVT, Expand); 180 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 181 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 182 183 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 184 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 185 186 setOperationAction(ISD::VASTART, MVT::Other, Custom); 187 setOperationAction(ISD::VAARG, MVT::Other, Expand); 188 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 189 setOperationAction(ISD::VAEND, MVT::Other, Expand); 190 191 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 192 if (!Subtarget.hasStdExtZbb()) { 193 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 195 } 196 197 if (Subtarget.is64Bit()) { 198 setOperationAction(ISD::ADD, MVT::i32, Custom); 199 setOperationAction(ISD::SUB, MVT::i32, Custom); 200 setOperationAction(ISD::SHL, MVT::i32, Custom); 201 setOperationAction(ISD::SRA, MVT::i32, Custom); 202 setOperationAction(ISD::SRL, MVT::i32, Custom); 203 204 setOperationAction(ISD::UADDO, MVT::i32, Custom); 205 setOperationAction(ISD::USUBO, MVT::i32, Custom); 206 setOperationAction(ISD::UADDSAT, MVT::i32, Custom); 207 setOperationAction(ISD::USUBSAT, MVT::i32, Custom); 208 } else { 209 setLibcallName(RTLIB::SHL_I128, nullptr); 210 setLibcallName(RTLIB::SRL_I128, nullptr); 211 setLibcallName(RTLIB::SRA_I128, nullptr); 212 setLibcallName(RTLIB::MUL_I128, nullptr); 213 setLibcallName(RTLIB::MULO_I64, nullptr); 214 } 215 216 if (!Subtarget.hasStdExtM()) { 217 setOperationAction(ISD::MUL, XLenVT, Expand); 218 setOperationAction(ISD::MULHS, XLenVT, Expand); 219 setOperationAction(ISD::MULHU, XLenVT, Expand); 220 setOperationAction(ISD::SDIV, XLenVT, Expand); 221 setOperationAction(ISD::UDIV, XLenVT, Expand); 222 setOperationAction(ISD::SREM, XLenVT, Expand); 223 setOperationAction(ISD::UREM, XLenVT, Expand); 224 } else { 225 if (Subtarget.is64Bit()) { 226 setOperationAction(ISD::MUL, MVT::i32, Custom); 227 setOperationAction(ISD::MUL, MVT::i128, Custom); 228 229 setOperationAction(ISD::SDIV, MVT::i8, Custom); 230 setOperationAction(ISD::UDIV, MVT::i8, Custom); 231 setOperationAction(ISD::UREM, MVT::i8, Custom); 232 setOperationAction(ISD::SDIV, MVT::i16, Custom); 233 setOperationAction(ISD::UDIV, MVT::i16, Custom); 234 setOperationAction(ISD::UREM, MVT::i16, Custom); 235 setOperationAction(ISD::SDIV, MVT::i32, Custom); 236 setOperationAction(ISD::UDIV, MVT::i32, Custom); 237 setOperationAction(ISD::UREM, MVT::i32, Custom); 238 } else { 239 setOperationAction(ISD::MUL, MVT::i64, Custom); 240 } 241 } 242 243 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 244 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 245 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 246 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 247 248 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 249 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 250 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 251 252 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 253 if (Subtarget.is64Bit()) { 254 setOperationAction(ISD::ROTL, MVT::i32, Custom); 255 setOperationAction(ISD::ROTR, MVT::i32, Custom); 256 } 257 } else { 258 setOperationAction(ISD::ROTL, XLenVT, Expand); 259 setOperationAction(ISD::ROTR, XLenVT, Expand); 260 } 261 262 if (Subtarget.hasStdExtZbp()) { 263 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable 264 // more combining. 265 setOperationAction(ISD::BITREVERSE, XLenVT, Custom); 266 setOperationAction(ISD::BSWAP, XLenVT, Custom); 267 setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); 268 // BSWAP i8 doesn't exist. 269 setOperationAction(ISD::BITREVERSE, MVT::i16, Custom); 270 setOperationAction(ISD::BSWAP, MVT::i16, Custom); 271 272 if (Subtarget.is64Bit()) { 273 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); 274 setOperationAction(ISD::BSWAP, MVT::i32, Custom); 275 } 276 } else { 277 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll 278 // pattern match it directly in isel. 279 setOperationAction(ISD::BSWAP, XLenVT, 280 Subtarget.hasStdExtZbb() ? Legal : Expand); 281 } 282 283 if (Subtarget.hasStdExtZbb()) { 284 setOperationAction(ISD::SMIN, XLenVT, Legal); 285 setOperationAction(ISD::SMAX, XLenVT, Legal); 286 setOperationAction(ISD::UMIN, XLenVT, Legal); 287 setOperationAction(ISD::UMAX, XLenVT, Legal); 288 289 if (Subtarget.is64Bit()) { 290 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 291 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); 292 setOperationAction(ISD::CTLZ, MVT::i32, Custom); 293 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); 294 } 295 } else { 296 setOperationAction(ISD::CTTZ, XLenVT, Expand); 297 setOperationAction(ISD::CTLZ, XLenVT, Expand); 298 setOperationAction(ISD::CTPOP, XLenVT, Expand); 299 } 300 301 if (Subtarget.hasStdExtZbt()) { 302 setOperationAction(ISD::FSHL, XLenVT, Custom); 303 setOperationAction(ISD::FSHR, XLenVT, Custom); 304 setOperationAction(ISD::SELECT, XLenVT, Legal); 305 306 if (Subtarget.is64Bit()) { 307 setOperationAction(ISD::FSHL, MVT::i32, Custom); 308 setOperationAction(ISD::FSHR, MVT::i32, Custom); 309 } 310 } else { 311 setOperationAction(ISD::SELECT, XLenVT, Custom); 312 } 313 314 static const ISD::CondCode FPCCToExpand[] = { 315 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 316 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 317 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; 318 319 static const ISD::NodeType FPOpToExpand[] = { 320 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, 321 ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; 322 323 if (Subtarget.hasStdExtZfh()) 324 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 325 326 if (Subtarget.hasStdExtZfh()) { 327 setOperationAction(ISD::FMINNUM, MVT::f16, Legal); 328 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); 329 setOperationAction(ISD::LRINT, MVT::f16, Legal); 330 setOperationAction(ISD::LLRINT, MVT::f16, Legal); 331 setOperationAction(ISD::LROUND, MVT::f16, Legal); 332 setOperationAction(ISD::LLROUND, MVT::f16, Legal); 333 for (auto CC : FPCCToExpand) 334 setCondCodeAction(CC, MVT::f16, Expand); 335 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); 336 setOperationAction(ISD::SELECT, MVT::f16, Custom); 337 setOperationAction(ISD::BR_CC, MVT::f16, Expand); 338 339 setOperationAction(ISD::FREM, MVT::f16, Promote); 340 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 341 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 342 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 343 setOperationAction(ISD::FRINT, MVT::f16, Promote); 344 setOperationAction(ISD::FROUND, MVT::f16, Promote); 345 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); 346 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 347 setOperationAction(ISD::FPOW, MVT::f16, Promote); 348 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 349 setOperationAction(ISD::FCOS, MVT::f16, Promote); 350 setOperationAction(ISD::FSIN, MVT::f16, Promote); 351 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 352 setOperationAction(ISD::FEXP, MVT::f16, Promote); 353 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 354 setOperationAction(ISD::FLOG, MVT::f16, Promote); 355 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 356 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 357 358 // We need to custom promote this. 359 if (Subtarget.is64Bit()) 360 setOperationAction(ISD::FPOWI, MVT::i32, Custom); 361 } 362 363 if (Subtarget.hasStdExtF()) { 364 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 365 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 366 setOperationAction(ISD::LRINT, MVT::f32, Legal); 367 setOperationAction(ISD::LLRINT, MVT::f32, Legal); 368 setOperationAction(ISD::LROUND, MVT::f32, Legal); 369 setOperationAction(ISD::LLROUND, MVT::f32, Legal); 370 for (auto CC : FPCCToExpand) 371 setCondCodeAction(CC, MVT::f32, Expand); 372 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 373 setOperationAction(ISD::SELECT, MVT::f32, Custom); 374 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 375 for (auto Op : FPOpToExpand) 376 setOperationAction(Op, MVT::f32, Expand); 377 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 378 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 379 } 380 381 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 382 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 383 384 if (Subtarget.hasStdExtD()) { 385 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 386 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 387 setOperationAction(ISD::LRINT, MVT::f64, Legal); 388 setOperationAction(ISD::LLRINT, MVT::f64, Legal); 389 setOperationAction(ISD::LROUND, MVT::f64, Legal); 390 setOperationAction(ISD::LLROUND, MVT::f64, Legal); 391 for (auto CC : FPCCToExpand) 392 setCondCodeAction(CC, MVT::f64, Expand); 393 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 394 setOperationAction(ISD::SELECT, MVT::f64, Custom); 395 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 396 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 397 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 398 for (auto Op : FPOpToExpand) 399 setOperationAction(Op, MVT::f64, Expand); 400 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 401 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 402 } 403 404 if (Subtarget.is64Bit()) { 405 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 406 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 407 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 408 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 409 } 410 411 if (Subtarget.hasStdExtF()) { 412 setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom); 413 setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom); 414 415 setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom); 416 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); 417 } 418 419 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 420 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 421 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 422 setOperationAction(ISD::JumpTable, XLenVT, Custom); 423 424 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 425 426 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 427 // Unfortunately this can't be determined just from the ISA naming string. 428 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 429 Subtarget.is64Bit() ? Legal : Custom); 430 431 setOperationAction(ISD::TRAP, MVT::Other, Legal); 432 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 433 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 434 if (Subtarget.is64Bit()) 435 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); 436 437 if (Subtarget.hasStdExtA()) { 438 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 439 setMinCmpXchgSizeInBits(32); 440 } else { 441 setMaxAtomicSizeInBitsSupported(0); 442 } 443 444 setBooleanContents(ZeroOrOneBooleanContent); 445 446 if (Subtarget.hasVInstructions()) { 447 setBooleanVectorContents(ZeroOrOneBooleanContent); 448 449 setOperationAction(ISD::VSCALE, XLenVT, Custom); 450 451 // RVV intrinsics may have illegal operands. 452 // We also need to custom legalize vmv.x.s. 453 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 454 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 455 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 456 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); 457 if (Subtarget.is64Bit()) { 458 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); 459 } else { 460 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 461 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 462 } 463 464 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 465 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 466 467 static const unsigned IntegerVPOps[] = { 468 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, 469 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM, 470 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, 471 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, 472 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, 473 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, 474 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}; 475 476 static const unsigned FloatingPointVPOps[] = { 477 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, 478 ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, 479 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX}; 480 481 if (!Subtarget.is64Bit()) { 482 // We must custom-lower certain vXi64 operations on RV32 due to the vector 483 // element type being illegal. 484 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom); 485 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom); 486 487 setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom); 488 setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom); 489 setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom); 490 setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom); 491 setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom); 492 setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom); 493 setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom); 494 setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom); 495 496 setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom); 497 setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom); 498 setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom); 499 setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom); 500 setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom); 501 setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom); 502 setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom); 503 setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom); 504 } 505 506 for (MVT VT : BoolVecVTs) { 507 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 508 509 // Mask VTs are custom-expanded into a series of standard nodes 510 setOperationAction(ISD::TRUNCATE, VT, Custom); 511 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 512 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 513 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 514 515 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 516 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 517 518 setOperationAction(ISD::SELECT, VT, Custom); 519 setOperationAction(ISD::SELECT_CC, VT, Expand); 520 setOperationAction(ISD::VSELECT, VT, Expand); 521 522 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 523 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 524 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 525 526 setOperationAction(ISD::VP_REDUCE_AND, VT, Custom); 527 setOperationAction(ISD::VP_REDUCE_OR, VT, Custom); 528 setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom); 529 530 // RVV has native int->float & float->int conversions where the 531 // element type sizes are within one power-of-two of each other. Any 532 // wider distances between type sizes have to be lowered as sequences 533 // which progressively narrow the gap in stages. 534 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 535 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 536 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 537 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 538 539 // Expand all extending loads to types larger than this, and truncating 540 // stores from types larger than this. 541 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 542 setTruncStoreAction(OtherVT, VT, Expand); 543 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 544 setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); 545 setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); 546 } 547 } 548 549 for (MVT VT : IntVecVTs) { 550 if (VT.getVectorElementType() == MVT::i64 && 551 !Subtarget.hasVInstructionsI64()) 552 continue; 553 554 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 555 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 556 557 // Vectors implement MULHS/MULHU. 558 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 559 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 560 561 setOperationAction(ISD::SMIN, VT, Legal); 562 setOperationAction(ISD::SMAX, VT, Legal); 563 setOperationAction(ISD::UMIN, VT, Legal); 564 setOperationAction(ISD::UMAX, VT, Legal); 565 566 setOperationAction(ISD::ROTL, VT, Expand); 567 setOperationAction(ISD::ROTR, VT, Expand); 568 569 setOperationAction(ISD::CTTZ, VT, Expand); 570 setOperationAction(ISD::CTLZ, VT, Expand); 571 setOperationAction(ISD::CTPOP, VT, Expand); 572 573 setOperationAction(ISD::BSWAP, VT, Expand); 574 575 // Custom-lower extensions and truncations from/to mask types. 576 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 577 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 578 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 579 580 // RVV has native int->float & float->int conversions where the 581 // element type sizes are within one power-of-two of each other. Any 582 // wider distances between type sizes have to be lowered as sequences 583 // which progressively narrow the gap in stages. 584 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 585 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 586 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 587 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 588 589 setOperationAction(ISD::SADDSAT, VT, Legal); 590 setOperationAction(ISD::UADDSAT, VT, Legal); 591 setOperationAction(ISD::SSUBSAT, VT, Legal); 592 setOperationAction(ISD::USUBSAT, VT, Legal); 593 594 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" 595 // nodes which truncate by one power of two at a time. 596 setOperationAction(ISD::TRUNCATE, VT, Custom); 597 598 // Custom-lower insert/extract operations to simplify patterns. 599 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 600 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 601 602 // Custom-lower reduction operations to set up the corresponding custom 603 // nodes' operands. 604 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 605 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 606 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 607 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 608 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 609 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 610 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 611 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 612 613 for (unsigned VPOpc : IntegerVPOps) 614 setOperationAction(VPOpc, VT, Custom); 615 616 setOperationAction(ISD::LOAD, VT, Custom); 617 setOperationAction(ISD::STORE, VT, Custom); 618 619 setOperationAction(ISD::MLOAD, VT, Custom); 620 setOperationAction(ISD::MSTORE, VT, Custom); 621 setOperationAction(ISD::MGATHER, VT, Custom); 622 setOperationAction(ISD::MSCATTER, VT, Custom); 623 624 setOperationAction(ISD::VP_LOAD, VT, Custom); 625 setOperationAction(ISD::VP_STORE, VT, Custom); 626 setOperationAction(ISD::VP_GATHER, VT, Custom); 627 setOperationAction(ISD::VP_SCATTER, VT, Custom); 628 629 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 630 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 631 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 632 633 setOperationAction(ISD::SELECT, VT, Custom); 634 setOperationAction(ISD::SELECT_CC, VT, Expand); 635 636 setOperationAction(ISD::STEP_VECTOR, VT, Custom); 637 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 638 639 for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { 640 setTruncStoreAction(VT, OtherVT, Expand); 641 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 642 setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); 643 setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); 644 } 645 646 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point 647 // type that can represent the value exactly. 648 if (VT.getVectorElementType() != MVT::i64) { 649 MVT FloatEltVT = 650 VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; 651 EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); 652 if (isTypeLegal(FloatVT)) { 653 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); 654 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); 655 } 656 } 657 } 658 659 // Expand various CCs to best match the RVV ISA, which natively supports UNE 660 // but no other unordered comparisons, and supports all ordered comparisons 661 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization 662 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE), 663 // and we pattern-match those back to the "original", swapping operands once 664 // more. This way we catch both operations and both "vf" and "fv" forms with 665 // fewer patterns. 666 static const ISD::CondCode VFPCCToExpand[] = { 667 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 668 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO, 669 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE, 670 }; 671 672 // Sets common operation actions on RVV floating-point vector types. 673 const auto SetCommonVFPActions = [&](MVT VT) { 674 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); 675 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type 676 // sizes are within one power-of-two of each other. Therefore conversions 677 // between vXf16 and vXf64 must be lowered as sequences which convert via 678 // vXf32. 679 setOperationAction(ISD::FP_ROUND, VT, Custom); 680 setOperationAction(ISD::FP_EXTEND, VT, Custom); 681 // Custom-lower insert/extract operations to simplify patterns. 682 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 683 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 684 // Expand various condition codes (explained above). 685 for (auto CC : VFPCCToExpand) 686 setCondCodeAction(CC, VT, Expand); 687 688 setOperationAction(ISD::FMINNUM, VT, Legal); 689 setOperationAction(ISD::FMAXNUM, VT, Legal); 690 691 setOperationAction(ISD::FTRUNC, VT, Custom); 692 setOperationAction(ISD::FCEIL, VT, Custom); 693 setOperationAction(ISD::FFLOOR, VT, Custom); 694 695 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 696 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 697 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); 698 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); 699 700 setOperationAction(ISD::FCOPYSIGN, VT, Legal); 701 702 setOperationAction(ISD::LOAD, VT, Custom); 703 setOperationAction(ISD::STORE, VT, Custom); 704 705 setOperationAction(ISD::MLOAD, VT, Custom); 706 setOperationAction(ISD::MSTORE, VT, Custom); 707 setOperationAction(ISD::MGATHER, VT, Custom); 708 setOperationAction(ISD::MSCATTER, VT, Custom); 709 710 setOperationAction(ISD::VP_LOAD, VT, Custom); 711 setOperationAction(ISD::VP_STORE, VT, Custom); 712 setOperationAction(ISD::VP_GATHER, VT, Custom); 713 setOperationAction(ISD::VP_SCATTER, VT, Custom); 714 715 setOperationAction(ISD::SELECT, VT, Custom); 716 setOperationAction(ISD::SELECT_CC, VT, Expand); 717 718 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 719 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 720 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 721 722 setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); 723 724 for (unsigned VPOpc : FloatingPointVPOps) 725 setOperationAction(VPOpc, VT, Custom); 726 }; 727 728 // Sets common extload/truncstore actions on RVV floating-point vector 729 // types. 730 const auto SetCommonVFPExtLoadTruncStoreActions = 731 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) { 732 for (auto SmallVT : SmallerVTs) { 733 setTruncStoreAction(VT, SmallVT, Expand); 734 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand); 735 } 736 }; 737 738 if (Subtarget.hasVInstructionsF16()) 739 for (MVT VT : F16VecVTs) 740 SetCommonVFPActions(VT); 741 742 for (MVT VT : F32VecVTs) { 743 if (Subtarget.hasVInstructionsF32()) 744 SetCommonVFPActions(VT); 745 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 746 } 747 748 for (MVT VT : F64VecVTs) { 749 if (Subtarget.hasVInstructionsF64()) 750 SetCommonVFPActions(VT); 751 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs); 752 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs); 753 } 754 755 if (Subtarget.useRVVForFixedLengthVectors()) { 756 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 757 if (!useRVVForFixedLengthVectorVT(VT)) 758 continue; 759 760 // By default everything must be expanded. 761 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 762 setOperationAction(Op, VT, Expand); 763 for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) { 764 setTruncStoreAction(VT, OtherVT, Expand); 765 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 766 setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); 767 setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); 768 } 769 770 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 771 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 772 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 773 774 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 775 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 776 777 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 778 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 779 780 setOperationAction(ISD::LOAD, VT, Custom); 781 setOperationAction(ISD::STORE, VT, Custom); 782 783 setOperationAction(ISD::SETCC, VT, Custom); 784 785 setOperationAction(ISD::SELECT, VT, Custom); 786 787 setOperationAction(ISD::TRUNCATE, VT, Custom); 788 789 setOperationAction(ISD::BITCAST, VT, Custom); 790 791 setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 792 setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 793 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 794 795 setOperationAction(ISD::VP_REDUCE_AND, VT, Custom); 796 setOperationAction(ISD::VP_REDUCE_OR, VT, Custom); 797 setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom); 798 799 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 800 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 801 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 802 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 803 804 // Operations below are different for between masks and other vectors. 805 if (VT.getVectorElementType() == MVT::i1) { 806 setOperationAction(ISD::AND, VT, Custom); 807 setOperationAction(ISD::OR, VT, Custom); 808 setOperationAction(ISD::XOR, VT, Custom); 809 continue; 810 } 811 812 // Use SPLAT_VECTOR to prevent type legalization from destroying the 813 // splats when type legalizing i64 scalar on RV32. 814 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs 815 // improvements first. 816 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { 817 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 818 setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); 819 } 820 821 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 822 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 823 824 setOperationAction(ISD::MLOAD, VT, Custom); 825 setOperationAction(ISD::MSTORE, VT, Custom); 826 setOperationAction(ISD::MGATHER, VT, Custom); 827 setOperationAction(ISD::MSCATTER, VT, Custom); 828 829 setOperationAction(ISD::VP_LOAD, VT, Custom); 830 setOperationAction(ISD::VP_STORE, VT, Custom); 831 setOperationAction(ISD::VP_GATHER, VT, Custom); 832 setOperationAction(ISD::VP_SCATTER, VT, Custom); 833 834 setOperationAction(ISD::ADD, VT, Custom); 835 setOperationAction(ISD::MUL, VT, Custom); 836 setOperationAction(ISD::SUB, VT, Custom); 837 setOperationAction(ISD::AND, VT, Custom); 838 setOperationAction(ISD::OR, VT, Custom); 839 setOperationAction(ISD::XOR, VT, Custom); 840 setOperationAction(ISD::SDIV, VT, Custom); 841 setOperationAction(ISD::SREM, VT, Custom); 842 setOperationAction(ISD::UDIV, VT, Custom); 843 setOperationAction(ISD::UREM, VT, Custom); 844 setOperationAction(ISD::SHL, VT, Custom); 845 setOperationAction(ISD::SRA, VT, Custom); 846 setOperationAction(ISD::SRL, VT, Custom); 847 848 setOperationAction(ISD::SMIN, VT, Custom); 849 setOperationAction(ISD::SMAX, VT, Custom); 850 setOperationAction(ISD::UMIN, VT, Custom); 851 setOperationAction(ISD::UMAX, VT, Custom); 852 setOperationAction(ISD::ABS, VT, Custom); 853 854 setOperationAction(ISD::MULHS, VT, Custom); 855 setOperationAction(ISD::MULHU, VT, Custom); 856 857 setOperationAction(ISD::SADDSAT, VT, Custom); 858 setOperationAction(ISD::UADDSAT, VT, Custom); 859 setOperationAction(ISD::SSUBSAT, VT, Custom); 860 setOperationAction(ISD::USUBSAT, VT, Custom); 861 862 setOperationAction(ISD::VSELECT, VT, Custom); 863 setOperationAction(ISD::SELECT_CC, VT, Expand); 864 865 setOperationAction(ISD::ANY_EXTEND, VT, Custom); 866 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 867 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 868 869 // Custom-lower reduction operations to set up the corresponding custom 870 // nodes' operands. 871 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 872 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 873 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 874 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 875 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 876 877 for (unsigned VPOpc : IntegerVPOps) 878 setOperationAction(VPOpc, VT, Custom); 879 880 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point 881 // type that can represent the value exactly. 882 if (VT.getVectorElementType() != MVT::i64) { 883 MVT FloatEltVT = 884 VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32; 885 EVT FloatVT = 886 MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); 887 if (isTypeLegal(FloatVT)) { 888 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); 889 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); 890 } 891 } 892 } 893 894 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { 895 if (!useRVVForFixedLengthVectorVT(VT)) 896 continue; 897 898 // By default everything must be expanded. 899 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) 900 setOperationAction(Op, VT, Expand); 901 for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) { 902 setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand); 903 setTruncStoreAction(VT, OtherVT, Expand); 904 } 905 906 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed. 907 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 908 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 909 910 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 911 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 912 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 913 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 914 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 915 916 setOperationAction(ISD::LOAD, VT, Custom); 917 setOperationAction(ISD::STORE, VT, Custom); 918 setOperationAction(ISD::MLOAD, VT, Custom); 919 setOperationAction(ISD::MSTORE, VT, Custom); 920 setOperationAction(ISD::MGATHER, VT, Custom); 921 setOperationAction(ISD::MSCATTER, VT, Custom); 922 923 setOperationAction(ISD::VP_LOAD, VT, Custom); 924 setOperationAction(ISD::VP_STORE, VT, Custom); 925 setOperationAction(ISD::VP_GATHER, VT, Custom); 926 setOperationAction(ISD::VP_SCATTER, VT, Custom); 927 928 setOperationAction(ISD::FADD, VT, Custom); 929 setOperationAction(ISD::FSUB, VT, Custom); 930 setOperationAction(ISD::FMUL, VT, Custom); 931 setOperationAction(ISD::FDIV, VT, Custom); 932 setOperationAction(ISD::FNEG, VT, Custom); 933 setOperationAction(ISD::FABS, VT, Custom); 934 setOperationAction(ISD::FCOPYSIGN, VT, Custom); 935 setOperationAction(ISD::FSQRT, VT, Custom); 936 setOperationAction(ISD::FMA, VT, Custom); 937 setOperationAction(ISD::FMINNUM, VT, Custom); 938 setOperationAction(ISD::FMAXNUM, VT, Custom); 939 940 setOperationAction(ISD::FP_ROUND, VT, Custom); 941 setOperationAction(ISD::FP_EXTEND, VT, Custom); 942 943 setOperationAction(ISD::FTRUNC, VT, Custom); 944 setOperationAction(ISD::FCEIL, VT, Custom); 945 setOperationAction(ISD::FFLOOR, VT, Custom); 946 947 for (auto CC : VFPCCToExpand) 948 setCondCodeAction(CC, VT, Expand); 949 950 setOperationAction(ISD::VSELECT, VT, Custom); 951 setOperationAction(ISD::SELECT, VT, Custom); 952 setOperationAction(ISD::SELECT_CC, VT, Expand); 953 954 setOperationAction(ISD::BITCAST, VT, Custom); 955 956 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 957 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 958 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); 959 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); 960 961 for (unsigned VPOpc : FloatingPointVPOps) 962 setOperationAction(VPOpc, VT, Custom); 963 } 964 965 // Custom-legalize bitcasts from fixed-length vectors to scalar types. 966 setOperationAction(ISD::BITCAST, MVT::i8, Custom); 967 setOperationAction(ISD::BITCAST, MVT::i16, Custom); 968 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 969 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 970 setOperationAction(ISD::BITCAST, MVT::f16, Custom); 971 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 972 setOperationAction(ISD::BITCAST, MVT::f64, Custom); 973 } 974 } 975 976 // Function alignments. 977 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 978 setMinFunctionAlignment(FunctionAlignment); 979 setPrefFunctionAlignment(FunctionAlignment); 980 981 setMinimumJumpTableEntries(5); 982 983 // Jumps are expensive, compared to logic 984 setJumpIsExpensive(); 985 986 setTargetDAGCombine(ISD::ADD); 987 setTargetDAGCombine(ISD::SUB); 988 setTargetDAGCombine(ISD::AND); 989 setTargetDAGCombine(ISD::OR); 990 setTargetDAGCombine(ISD::XOR); 991 setTargetDAGCombine(ISD::ANY_EXTEND); 992 setTargetDAGCombine(ISD::ZERO_EXTEND); 993 if (Subtarget.hasVInstructions()) { 994 setTargetDAGCombine(ISD::FCOPYSIGN); 995 setTargetDAGCombine(ISD::MGATHER); 996 setTargetDAGCombine(ISD::MSCATTER); 997 setTargetDAGCombine(ISD::VP_GATHER); 998 setTargetDAGCombine(ISD::VP_SCATTER); 999 setTargetDAGCombine(ISD::SRA); 1000 setTargetDAGCombine(ISD::SRL); 1001 setTargetDAGCombine(ISD::SHL); 1002 setTargetDAGCombine(ISD::STORE); 1003 } 1004 } 1005 1006 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, 1007 LLVMContext &Context, 1008 EVT VT) const { 1009 if (!VT.isVector()) 1010 return getPointerTy(DL); 1011 if (Subtarget.hasVInstructions() && 1012 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors())) 1013 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); 1014 return VT.changeVectorElementTypeToInteger(); 1015 } 1016 1017 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const { 1018 return Subtarget.getXLenVT(); 1019 } 1020 1021 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1022 const CallInst &I, 1023 MachineFunction &MF, 1024 unsigned Intrinsic) const { 1025 auto &DL = I.getModule()->getDataLayout(); 1026 switch (Intrinsic) { 1027 default: 1028 return false; 1029 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 1030 case Intrinsic::riscv_masked_atomicrmw_add_i32: 1031 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 1032 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 1033 case Intrinsic::riscv_masked_atomicrmw_max_i32: 1034 case Intrinsic::riscv_masked_atomicrmw_min_i32: 1035 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 1036 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 1037 case Intrinsic::riscv_masked_cmpxchg_i32: { 1038 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 1039 Info.opc = ISD::INTRINSIC_W_CHAIN; 1040 Info.memVT = MVT::getVT(PtrTy->getElementType()); 1041 Info.ptrVal = I.getArgOperand(0); 1042 Info.offset = 0; 1043 Info.align = Align(4); 1044 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 1045 MachineMemOperand::MOVolatile; 1046 return true; 1047 } 1048 case Intrinsic::riscv_masked_strided_load: 1049 Info.opc = ISD::INTRINSIC_W_CHAIN; 1050 Info.ptrVal = I.getArgOperand(1); 1051 Info.memVT = getValueType(DL, I.getType()->getScalarType()); 1052 Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8); 1053 Info.size = MemoryLocation::UnknownSize; 1054 Info.flags |= MachineMemOperand::MOLoad; 1055 return true; 1056 case Intrinsic::riscv_masked_strided_store: 1057 Info.opc = ISD::INTRINSIC_VOID; 1058 Info.ptrVal = I.getArgOperand(1); 1059 Info.memVT = 1060 getValueType(DL, I.getArgOperand(0)->getType()->getScalarType()); 1061 Info.align = Align( 1062 DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) / 1063 8); 1064 Info.size = MemoryLocation::UnknownSize; 1065 Info.flags |= MachineMemOperand::MOStore; 1066 return true; 1067 } 1068 } 1069 1070 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1071 const AddrMode &AM, Type *Ty, 1072 unsigned AS, 1073 Instruction *I) const { 1074 // No global is ever allowed as a base. 1075 if (AM.BaseGV) 1076 return false; 1077 1078 // Require a 12-bit signed offset. 1079 if (!isInt<12>(AM.BaseOffs)) 1080 return false; 1081 1082 switch (AM.Scale) { 1083 case 0: // "r+i" or just "i", depending on HasBaseReg. 1084 break; 1085 case 1: 1086 if (!AM.HasBaseReg) // allow "r+i". 1087 break; 1088 return false; // disallow "r+r" or "r+r+i". 1089 default: 1090 return false; 1091 } 1092 1093 return true; 1094 } 1095 1096 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 1097 return isInt<12>(Imm); 1098 } 1099 1100 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 1101 return isInt<12>(Imm); 1102 } 1103 1104 // On RV32, 64-bit integers are split into their high and low parts and held 1105 // in two different registers, so the trunc is free since the low register can 1106 // just be used. 1107 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 1108 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 1109 return false; 1110 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 1111 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 1112 return (SrcBits == 64 && DestBits == 32); 1113 } 1114 1115 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 1116 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 1117 !SrcVT.isInteger() || !DstVT.isInteger()) 1118 return false; 1119 unsigned SrcBits = SrcVT.getSizeInBits(); 1120 unsigned DestBits = DstVT.getSizeInBits(); 1121 return (SrcBits == 64 && DestBits == 32); 1122 } 1123 1124 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 1125 // Zexts are free if they can be combined with a load. 1126 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 1127 EVT MemVT = LD->getMemoryVT(); 1128 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 1129 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 1130 (LD->getExtensionType() == ISD::NON_EXTLOAD || 1131 LD->getExtensionType() == ISD::ZEXTLOAD)) 1132 return true; 1133 } 1134 1135 return TargetLowering::isZExtFree(Val, VT2); 1136 } 1137 1138 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 1139 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 1140 } 1141 1142 bool RISCVTargetLowering::isCheapToSpeculateCttz() const { 1143 return Subtarget.hasStdExtZbb(); 1144 } 1145 1146 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { 1147 return Subtarget.hasStdExtZbb(); 1148 } 1149 1150 bool RISCVTargetLowering::hasAndNot(SDValue Y) const { 1151 EVT VT = Y.getValueType(); 1152 1153 // FIXME: Support vectors once we have tests. 1154 if (VT.isVector()) 1155 return false; 1156 1157 return Subtarget.hasStdExtZbb() && !isa<ConstantSDNode>(Y); 1158 } 1159 1160 /// Check if sinking \p I's operands to I's basic block is profitable, because 1161 /// the operands can be folded into a target instruction, e.g. 1162 /// splats of scalars can fold into vector instructions. 1163 bool RISCVTargetLowering::shouldSinkOperands( 1164 Instruction *I, SmallVectorImpl<Use *> &Ops) const { 1165 using namespace llvm::PatternMatch; 1166 1167 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions()) 1168 return false; 1169 1170 auto IsSinker = [&](Instruction *I, int Operand) { 1171 switch (I->getOpcode()) { 1172 case Instruction::Add: 1173 case Instruction::Sub: 1174 case Instruction::Mul: 1175 case Instruction::And: 1176 case Instruction::Or: 1177 case Instruction::Xor: 1178 case Instruction::FAdd: 1179 case Instruction::FSub: 1180 case Instruction::FMul: 1181 case Instruction::FDiv: 1182 case Instruction::ICmp: 1183 case Instruction::FCmp: 1184 return true; 1185 case Instruction::Shl: 1186 case Instruction::LShr: 1187 case Instruction::AShr: 1188 case Instruction::UDiv: 1189 case Instruction::SDiv: 1190 case Instruction::URem: 1191 case Instruction::SRem: 1192 return Operand == 1; 1193 case Instruction::Call: 1194 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 1195 switch (II->getIntrinsicID()) { 1196 case Intrinsic::fma: 1197 return Operand == 0 || Operand == 1; 1198 default: 1199 return false; 1200 } 1201 } 1202 return false; 1203 default: 1204 return false; 1205 } 1206 }; 1207 1208 for (auto OpIdx : enumerate(I->operands())) { 1209 if (!IsSinker(I, OpIdx.index())) 1210 continue; 1211 1212 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get()); 1213 // Make sure we are not already sinking this operand 1214 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) 1215 continue; 1216 1217 // We are looking for a splat that can be sunk. 1218 if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), 1219 m_Undef(), m_ZeroMask()))) 1220 continue; 1221 1222 // All uses of the shuffle should be sunk to avoid duplicating it across gpr 1223 // and vector registers 1224 for (Use &U : Op->uses()) { 1225 Instruction *Insn = cast<Instruction>(U.getUser()); 1226 if (!IsSinker(Insn, U.getOperandNo())) 1227 return false; 1228 } 1229 1230 Ops.push_back(&Op->getOperandUse(0)); 1231 Ops.push_back(&OpIdx.value()); 1232 } 1233 return true; 1234 } 1235 1236 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 1237 bool ForCodeSize) const { 1238 if (VT == MVT::f16 && !Subtarget.hasStdExtZfhmin()) 1239 return false; 1240 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 1241 return false; 1242 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 1243 return false; 1244 if (Imm.isNegZero()) 1245 return false; 1246 return Imm.isZero(); 1247 } 1248 1249 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 1250 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || 1251 (VT == MVT::f32 && Subtarget.hasStdExtF()) || 1252 (VT == MVT::f64 && Subtarget.hasStdExtD()); 1253 } 1254 1255 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, 1256 CallingConv::ID CC, 1257 EVT VT) const { 1258 // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled. 1259 // We might still end up using a GPR but that will be decided based on ABI. 1260 if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin()) 1261 return MVT::f32; 1262 1263 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); 1264 } 1265 1266 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, 1267 CallingConv::ID CC, 1268 EVT VT) const { 1269 // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled. 1270 // We might still end up using a GPR but that will be decided based on ABI. 1271 if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin()) 1272 return 1; 1273 1274 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); 1275 } 1276 1277 // Changes the condition code and swaps operands if necessary, so the SetCC 1278 // operation matches one of the comparisons supported directly by branches 1279 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare 1280 // with 1/-1. 1281 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, 1282 ISD::CondCode &CC, SelectionDAG &DAG) { 1283 // Convert X > -1 to X >= 0. 1284 if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { 1285 RHS = DAG.getConstant(0, DL, RHS.getValueType()); 1286 CC = ISD::SETGE; 1287 return; 1288 } 1289 // Convert X < 1 to 0 >= X. 1290 if (CC == ISD::SETLT && isOneConstant(RHS)) { 1291 RHS = LHS; 1292 LHS = DAG.getConstant(0, DL, RHS.getValueType()); 1293 CC = ISD::SETGE; 1294 return; 1295 } 1296 1297 switch (CC) { 1298 default: 1299 break; 1300 case ISD::SETGT: 1301 case ISD::SETLE: 1302 case ISD::SETUGT: 1303 case ISD::SETULE: 1304 CC = ISD::getSetCCSwappedOperands(CC); 1305 std::swap(LHS, RHS); 1306 break; 1307 } 1308 } 1309 1310 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) { 1311 assert(VT.isScalableVector() && "Expecting a scalable vector type"); 1312 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); 1313 if (VT.getVectorElementType() == MVT::i1) 1314 KnownSize *= 8; 1315 1316 switch (KnownSize) { 1317 default: 1318 llvm_unreachable("Invalid LMUL."); 1319 case 8: 1320 return RISCVII::VLMUL::LMUL_F8; 1321 case 16: 1322 return RISCVII::VLMUL::LMUL_F4; 1323 case 32: 1324 return RISCVII::VLMUL::LMUL_F2; 1325 case 64: 1326 return RISCVII::VLMUL::LMUL_1; 1327 case 128: 1328 return RISCVII::VLMUL::LMUL_2; 1329 case 256: 1330 return RISCVII::VLMUL::LMUL_4; 1331 case 512: 1332 return RISCVII::VLMUL::LMUL_8; 1333 } 1334 } 1335 1336 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) { 1337 switch (LMul) { 1338 default: 1339 llvm_unreachable("Invalid LMUL."); 1340 case RISCVII::VLMUL::LMUL_F8: 1341 case RISCVII::VLMUL::LMUL_F4: 1342 case RISCVII::VLMUL::LMUL_F2: 1343 case RISCVII::VLMUL::LMUL_1: 1344 return RISCV::VRRegClassID; 1345 case RISCVII::VLMUL::LMUL_2: 1346 return RISCV::VRM2RegClassID; 1347 case RISCVII::VLMUL::LMUL_4: 1348 return RISCV::VRM4RegClassID; 1349 case RISCVII::VLMUL::LMUL_8: 1350 return RISCV::VRM8RegClassID; 1351 } 1352 } 1353 1354 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) { 1355 RISCVII::VLMUL LMUL = getLMUL(VT); 1356 if (LMUL == RISCVII::VLMUL::LMUL_F8 || 1357 LMUL == RISCVII::VLMUL::LMUL_F4 || 1358 LMUL == RISCVII::VLMUL::LMUL_F2 || 1359 LMUL == RISCVII::VLMUL::LMUL_1) { 1360 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 1361 "Unexpected subreg numbering"); 1362 return RISCV::sub_vrm1_0 + Index; 1363 } 1364 if (LMUL == RISCVII::VLMUL::LMUL_2) { 1365 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 1366 "Unexpected subreg numbering"); 1367 return RISCV::sub_vrm2_0 + Index; 1368 } 1369 if (LMUL == RISCVII::VLMUL::LMUL_4) { 1370 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 1371 "Unexpected subreg numbering"); 1372 return RISCV::sub_vrm4_0 + Index; 1373 } 1374 llvm_unreachable("Invalid vector type."); 1375 } 1376 1377 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) { 1378 if (VT.getVectorElementType() == MVT::i1) 1379 return RISCV::VRRegClassID; 1380 return getRegClassIDForLMUL(getLMUL(VT)); 1381 } 1382 1383 // Attempt to decompose a subvector insert/extract between VecVT and 1384 // SubVecVT via subregister indices. Returns the subregister index that 1385 // can perform the subvector insert/extract with the given element index, as 1386 // well as the index corresponding to any leftover subvectors that must be 1387 // further inserted/extracted within the register class for SubVecVT. 1388 std::pair<unsigned, unsigned> 1389 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1390 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, 1391 const RISCVRegisterInfo *TRI) { 1392 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID && 1393 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID && 1394 RISCV::VRM2RegClassID > RISCV::VRRegClassID), 1395 "Register classes not ordered"); 1396 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT); 1397 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT); 1398 // Try to compose a subregister index that takes us from the incoming 1399 // LMUL>1 register class down to the outgoing one. At each step we half 1400 // the LMUL: 1401 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0 1402 // Note that this is not guaranteed to find a subregister index, such as 1403 // when we are extracting from one VR type to another. 1404 unsigned SubRegIdx = RISCV::NoSubRegister; 1405 for (const unsigned RCID : 1406 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID}) 1407 if (VecRegClassID > RCID && SubRegClassID <= RCID) { 1408 VecVT = VecVT.getHalfNumVectorElementsVT(); 1409 bool IsHi = 1410 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue(); 1411 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx, 1412 getSubregIndexByMVT(VecVT, IsHi)); 1413 if (IsHi) 1414 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue(); 1415 } 1416 return {SubRegIdx, InsertExtractIdx}; 1417 } 1418 1419 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar 1420 // stores for those types. 1421 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const { 1422 return !Subtarget.useRVVForFixedLengthVectors() || 1423 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1); 1424 } 1425 1426 bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const { 1427 if (ScalarTy->isPointerTy()) 1428 return true; 1429 1430 if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) || 1431 ScalarTy->isIntegerTy(32)) 1432 return true; 1433 1434 if (ScalarTy->isIntegerTy(64)) 1435 return Subtarget.hasVInstructionsI64(); 1436 1437 if (ScalarTy->isHalfTy()) 1438 return Subtarget.hasVInstructionsF16(); 1439 if (ScalarTy->isFloatTy()) 1440 return Subtarget.hasVInstructionsF32(); 1441 if (ScalarTy->isDoubleTy()) 1442 return Subtarget.hasVInstructionsF64(); 1443 1444 return false; 1445 } 1446 1447 static bool useRVVForFixedLengthVectorVT(MVT VT, 1448 const RISCVSubtarget &Subtarget) { 1449 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!"); 1450 if (!Subtarget.useRVVForFixedLengthVectors()) 1451 return false; 1452 1453 // We only support a set of vector types with a consistent maximum fixed size 1454 // across all supported vector element types to avoid legalization issues. 1455 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest 1456 // fixed-length vector type we support is 1024 bytes. 1457 if (VT.getFixedSizeInBits() > 1024 * 8) 1458 return false; 1459 1460 unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits(); 1461 1462 MVT EltVT = VT.getVectorElementType(); 1463 1464 // Don't use RVV for vectors we cannot scalarize if required. 1465 switch (EltVT.SimpleTy) { 1466 // i1 is supported but has different rules. 1467 default: 1468 return false; 1469 case MVT::i1: 1470 // Masks can only use a single register. 1471 if (VT.getVectorNumElements() > MinVLen) 1472 return false; 1473 MinVLen /= 8; 1474 break; 1475 case MVT::i8: 1476 case MVT::i16: 1477 case MVT::i32: 1478 break; 1479 case MVT::i64: 1480 if (!Subtarget.hasVInstructionsI64()) 1481 return false; 1482 break; 1483 case MVT::f16: 1484 if (!Subtarget.hasVInstructionsF16()) 1485 return false; 1486 break; 1487 case MVT::f32: 1488 if (!Subtarget.hasVInstructionsF32()) 1489 return false; 1490 break; 1491 case MVT::f64: 1492 if (!Subtarget.hasVInstructionsF64()) 1493 return false; 1494 break; 1495 } 1496 1497 // Reject elements larger than ELEN. 1498 if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors()) 1499 return false; 1500 1501 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen); 1502 // Don't use RVV for types that don't fit. 1503 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors()) 1504 return false; 1505 1506 // TODO: Perhaps an artificial restriction, but worth having whilst getting 1507 // the base fixed length RVV support in place. 1508 if (!VT.isPow2VectorType()) 1509 return false; 1510 1511 return true; 1512 } 1513 1514 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const { 1515 return ::useRVVForFixedLengthVectorVT(VT, Subtarget); 1516 } 1517 1518 // Return the largest legal scalable vector type that matches VT's element type. 1519 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT, 1520 const RISCVSubtarget &Subtarget) { 1521 // This may be called before legal types are setup. 1522 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) || 1523 useRVVForFixedLengthVectorVT(VT, Subtarget)) && 1524 "Expected legal fixed length vector!"); 1525 1526 unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits(); 1527 unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors(); 1528 1529 MVT EltVT = VT.getVectorElementType(); 1530 switch (EltVT.SimpleTy) { 1531 default: 1532 llvm_unreachable("unexpected element type for RVV container"); 1533 case MVT::i1: 1534 case MVT::i8: 1535 case MVT::i16: 1536 case MVT::i32: 1537 case MVT::i64: 1538 case MVT::f16: 1539 case MVT::f32: 1540 case MVT::f64: { 1541 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for 1542 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within 1543 // each fractional LMUL we support SEW between 8 and LMUL*ELEN. 1544 unsigned NumElts = 1545 (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen; 1546 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen); 1547 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts"); 1548 return MVT::getScalableVectorVT(EltVT, NumElts); 1549 } 1550 } 1551 } 1552 1553 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT, 1554 const RISCVSubtarget &Subtarget) { 1555 return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT, 1556 Subtarget); 1557 } 1558 1559 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const { 1560 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget()); 1561 } 1562 1563 // Grow V to consume an entire RVV register. 1564 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1565 const RISCVSubtarget &Subtarget) { 1566 assert(VT.isScalableVector() && 1567 "Expected to convert into a scalable vector!"); 1568 assert(V.getValueType().isFixedLengthVector() && 1569 "Expected a fixed length vector operand!"); 1570 SDLoc DL(V); 1571 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1572 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); 1573 } 1574 1575 // Shrink V so it's just big enough to maintain a VT's worth of data. 1576 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, 1577 const RISCVSubtarget &Subtarget) { 1578 assert(VT.isFixedLengthVector() && 1579 "Expected to convert into a fixed length vector!"); 1580 assert(V.getValueType().isScalableVector() && 1581 "Expected a scalable vector operand!"); 1582 SDLoc DL(V); 1583 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 1584 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); 1585 } 1586 1587 // Gets the two common "VL" operands: an all-ones mask and the vector length. 1588 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is 1589 // the vector type that it is contained in. 1590 static std::pair<SDValue, SDValue> 1591 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG, 1592 const RISCVSubtarget &Subtarget) { 1593 assert(ContainerVT.isScalableVector() && "Expecting scalable container type"); 1594 MVT XLenVT = Subtarget.getXLenVT(); 1595 SDValue VL = VecVT.isFixedLengthVector() 1596 ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT) 1597 : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1598 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 1599 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 1600 return {Mask, VL}; 1601 } 1602 1603 // As above but assuming the given type is a scalable vector type. 1604 static std::pair<SDValue, SDValue> 1605 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG, 1606 const RISCVSubtarget &Subtarget) { 1607 assert(VecVT.isScalableVector() && "Expecting a scalable vector"); 1608 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget); 1609 } 1610 1611 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few 1612 // of either is (currently) supported. This can get us into an infinite loop 1613 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR 1614 // as a ..., etc. 1615 // Until either (or both) of these can reliably lower any node, reporting that 1616 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks 1617 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack, 1618 // which is not desirable. 1619 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles( 1620 EVT VT, unsigned DefinedValues) const { 1621 return false; 1622 } 1623 1624 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { 1625 // Only splats are currently supported. 1626 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT)) 1627 return true; 1628 1629 return false; 1630 } 1631 1632 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) { 1633 // RISCV FP-to-int conversions saturate to the destination register size, but 1634 // don't produce 0 for nan. We can use a conversion instruction and fix the 1635 // nan case with a compare and a select. 1636 SDValue Src = Op.getOperand(0); 1637 1638 EVT DstVT = Op.getValueType(); 1639 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 1640 1641 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; 1642 unsigned Opc; 1643 if (SatVT == DstVT) 1644 Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ; 1645 else if (DstVT == MVT::i64 && SatVT == MVT::i32) 1646 Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64; 1647 else 1648 return SDValue(); 1649 // FIXME: Support other SatVTs by clamping before or after the conversion. 1650 1651 SDLoc DL(Op); 1652 SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src); 1653 1654 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT); 1655 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); 1656 } 1657 1658 // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain 1659 // and back. Taking care to avoid converting values that are nan or already 1660 // correct. 1661 // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't 1662 // have FRM dependencies modeled yet. 1663 static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) { 1664 MVT VT = Op.getSimpleValueType(); 1665 assert(VT.isVector() && "Unexpected type"); 1666 1667 SDLoc DL(Op); 1668 1669 // Freeze the source since we are increasing the number of uses. 1670 SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0)); 1671 1672 // Truncate to integer and convert back to FP. 1673 MVT IntVT = VT.changeVectorElementTypeToInteger(); 1674 SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src); 1675 Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated); 1676 1677 MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 1678 1679 if (Op.getOpcode() == ISD::FCEIL) { 1680 // If the truncated value is the greater than or equal to the original 1681 // value, we've computed the ceil. Otherwise, we went the wrong way and 1682 // need to increase by 1. 1683 // FIXME: This should use a masked operation. Handle here or in isel? 1684 SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated, 1685 DAG.getConstantFP(1.0, DL, VT)); 1686 SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT); 1687 Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated); 1688 } else if (Op.getOpcode() == ISD::FFLOOR) { 1689 // If the truncated value is the less than or equal to the original value, 1690 // we've computed the floor. Otherwise, we went the wrong way and need to 1691 // decrease by 1. 1692 // FIXME: This should use a masked operation. Handle here or in isel? 1693 SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated, 1694 DAG.getConstantFP(1.0, DL, VT)); 1695 SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT); 1696 Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated); 1697 } 1698 1699 // Restore the original sign so that -0.0 is preserved. 1700 Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src); 1701 1702 // Determine the largest integer that can be represented exactly. This and 1703 // values larger than it don't have any fractional bits so don't need to 1704 // be converted. 1705 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); 1706 unsigned Precision = APFloat::semanticsPrecision(FltSem); 1707 APFloat MaxVal = APFloat(FltSem); 1708 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), 1709 /*IsSigned*/ false, APFloat::rmNearestTiesToEven); 1710 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); 1711 1712 // If abs(Src) was larger than MaxVal or nan, keep it. 1713 SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src); 1714 SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT); 1715 return DAG.getSelect(DL, VT, Setcc, Truncated, Src); 1716 } 1717 1718 static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG, 1719 const RISCVSubtarget &Subtarget) { 1720 MVT VT = Op.getSimpleValueType(); 1721 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1722 1723 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 1724 1725 SDLoc DL(Op); 1726 SDValue Mask, VL; 1727 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1728 1729 unsigned Opc = 1730 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; 1731 SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL); 1732 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1733 } 1734 1735 struct VIDSequence { 1736 int64_t StepNumerator; 1737 unsigned StepDenominator; 1738 int64_t Addend; 1739 }; 1740 1741 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S] 1742 // to the (non-zero) step S and start value X. This can be then lowered as the 1743 // RVV sequence (VID * S) + X, for example. 1744 // The step S is represented as an integer numerator divided by a positive 1745 // denominator. Note that the implementation currently only identifies 1746 // sequences in which either the numerator is +/- 1 or the denominator is 1. It 1747 // cannot detect 2/3, for example. 1748 // Note that this method will also match potentially unappealing index 1749 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to 1750 // determine whether this is worth generating code for. 1751 static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) { 1752 unsigned NumElts = Op.getNumOperands(); 1753 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); 1754 if (!Op.getValueType().isInteger()) 1755 return None; 1756 1757 Optional<unsigned> SeqStepDenom; 1758 Optional<int64_t> SeqStepNum, SeqAddend; 1759 Optional<std::pair<uint64_t, unsigned>> PrevElt; 1760 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits(); 1761 for (unsigned Idx = 0; Idx < NumElts; Idx++) { 1762 // Assume undef elements match the sequence; we just have to be careful 1763 // when interpolating across them. 1764 if (Op.getOperand(Idx).isUndef()) 1765 continue; 1766 // The BUILD_VECTOR must be all constants. 1767 if (!isa<ConstantSDNode>(Op.getOperand(Idx))) 1768 return None; 1769 1770 uint64_t Val = Op.getConstantOperandVal(Idx) & 1771 maskTrailingOnes<uint64_t>(EltSizeInBits); 1772 1773 if (PrevElt) { 1774 // Calculate the step since the last non-undef element, and ensure 1775 // it's consistent across the entire sequence. 1776 unsigned IdxDiff = Idx - PrevElt->second; 1777 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits); 1778 1779 // A zero-value value difference means that we're somewhere in the middle 1780 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a 1781 // step change before evaluating the sequence. 1782 if (ValDiff != 0) { 1783 int64_t Remainder = ValDiff % IdxDiff; 1784 // Normalize the step if it's greater than 1. 1785 if (Remainder != ValDiff) { 1786 // The difference must cleanly divide the element span. 1787 if (Remainder != 0) 1788 return None; 1789 ValDiff /= IdxDiff; 1790 IdxDiff = 1; 1791 } 1792 1793 if (!SeqStepNum) 1794 SeqStepNum = ValDiff; 1795 else if (ValDiff != SeqStepNum) 1796 return None; 1797 1798 if (!SeqStepDenom) 1799 SeqStepDenom = IdxDiff; 1800 else if (IdxDiff != *SeqStepDenom) 1801 return None; 1802 } 1803 } 1804 1805 // Record and/or check any addend. 1806 if (SeqStepNum && SeqStepDenom) { 1807 uint64_t ExpectedVal = 1808 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; 1809 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); 1810 if (!SeqAddend) 1811 SeqAddend = Addend; 1812 else if (SeqAddend != Addend) 1813 return None; 1814 } 1815 1816 // Record this non-undef element for later. 1817 if (!PrevElt || PrevElt->first != Val) 1818 PrevElt = std::make_pair(Val, Idx); 1819 } 1820 // We need to have logged both a step and an addend for this to count as 1821 // a legal index sequence. 1822 if (!SeqStepNum || !SeqStepDenom || !SeqAddend) 1823 return None; 1824 1825 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; 1826 } 1827 1828 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 1829 const RISCVSubtarget &Subtarget) { 1830 MVT VT = Op.getSimpleValueType(); 1831 assert(VT.isFixedLengthVector() && "Unexpected vector!"); 1832 1833 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 1834 1835 SDLoc DL(Op); 1836 SDValue Mask, VL; 1837 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 1838 1839 MVT XLenVT = Subtarget.getXLenVT(); 1840 unsigned NumElts = Op.getNumOperands(); 1841 1842 if (VT.getVectorElementType() == MVT::i1) { 1843 if (ISD::isBuildVectorAllZeros(Op.getNode())) { 1844 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL); 1845 return convertFromScalableVector(VT, VMClr, DAG, Subtarget); 1846 } 1847 1848 if (ISD::isBuildVectorAllOnes(Op.getNode())) { 1849 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 1850 return convertFromScalableVector(VT, VMSet, DAG, Subtarget); 1851 } 1852 1853 // Lower constant mask BUILD_VECTORs via an integer vector type, in 1854 // scalar integer chunks whose bit-width depends on the number of mask 1855 // bits and XLEN. 1856 // First, determine the most appropriate scalar integer type to use. This 1857 // is at most XLenVT, but may be shrunk to a smaller vector element type 1858 // according to the size of the final vector - use i8 chunks rather than 1859 // XLenVT if we're producing a v8i1. This results in more consistent 1860 // codegen across RV32 and RV64. 1861 unsigned NumViaIntegerBits = 1862 std::min(std::max(NumElts, 8u), Subtarget.getXLen()); 1863 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { 1864 // If we have to use more than one INSERT_VECTOR_ELT then this 1865 // optimization is likely to increase code size; avoid peforming it in 1866 // such a case. We can use a load from a constant pool in this case. 1867 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits) 1868 return SDValue(); 1869 // Now we can create our integer vector type. Note that it may be larger 1870 // than the resulting mask type: v4i1 would use v1i8 as its integer type. 1871 MVT IntegerViaVecVT = 1872 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), 1873 divideCeil(NumElts, NumViaIntegerBits)); 1874 1875 uint64_t Bits = 0; 1876 unsigned BitPos = 0, IntegerEltIdx = 0; 1877 SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); 1878 1879 for (unsigned I = 0; I < NumElts; I++, BitPos++) { 1880 // Once we accumulate enough bits to fill our scalar type, insert into 1881 // our vector and clear our accumulated data. 1882 if (I != 0 && I % NumViaIntegerBits == 0) { 1883 if (NumViaIntegerBits <= 32) 1884 Bits = SignExtend64(Bits, 32); 1885 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 1886 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, 1887 Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); 1888 Bits = 0; 1889 BitPos = 0; 1890 IntegerEltIdx++; 1891 } 1892 SDValue V = Op.getOperand(I); 1893 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); 1894 Bits |= ((uint64_t)BitValue << BitPos); 1895 } 1896 1897 // Insert the (remaining) scalar value into position in our integer 1898 // vector type. 1899 if (NumViaIntegerBits <= 32) 1900 Bits = SignExtend64(Bits, 32); 1901 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); 1902 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt, 1903 DAG.getConstant(IntegerEltIdx, DL, XLenVT)); 1904 1905 if (NumElts < NumViaIntegerBits) { 1906 // If we're producing a smaller vector than our minimum legal integer 1907 // type, bitcast to the equivalent (known-legal) mask type, and extract 1908 // our final mask. 1909 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type"); 1910 Vec = DAG.getBitcast(MVT::v8i1, Vec); 1911 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec, 1912 DAG.getConstant(0, DL, XLenVT)); 1913 } else { 1914 // Else we must have produced an integer type with the same size as the 1915 // mask type; bitcast for the final result. 1916 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits()); 1917 Vec = DAG.getBitcast(VT, Vec); 1918 } 1919 1920 return Vec; 1921 } 1922 1923 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask 1924 // vector type, we have a legal equivalently-sized i8 type, so we can use 1925 // that. 1926 MVT WideVecVT = VT.changeVectorElementType(MVT::i8); 1927 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT); 1928 1929 SDValue WideVec; 1930 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1931 // For a splat, perform a scalar truncate before creating the wider 1932 // vector. 1933 assert(Splat.getValueType() == XLenVT && 1934 "Unexpected type for i1 splat value"); 1935 Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat, 1936 DAG.getConstant(1, DL, XLenVT)); 1937 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat); 1938 } else { 1939 SmallVector<SDValue, 8> Ops(Op->op_values()); 1940 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops); 1941 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT); 1942 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne); 1943 } 1944 1945 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE); 1946 } 1947 1948 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) { 1949 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL 1950 : RISCVISD::VMV_V_X_VL; 1951 Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); 1952 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 1953 } 1954 1955 // Try and match index sequences, which we can lower to the vid instruction 1956 // with optional modifications. An all-undef vector is matched by 1957 // getSplatValue, above. 1958 if (auto SimpleVID = isSimpleVIDSequence(Op)) { 1959 int64_t StepNumerator = SimpleVID->StepNumerator; 1960 unsigned StepDenominator = SimpleVID->StepDenominator; 1961 int64_t Addend = SimpleVID->Addend; 1962 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a 1963 // threshold since it's the immediate value many RVV instructions accept. 1964 if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) && 1965 isInt<5>(Addend)) { 1966 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); 1967 // Convert right out of the scalable type so we can use standard ISD 1968 // nodes for the rest of the computation. If we used scalable types with 1969 // these, we'd lose the fixed-length vector info and generate worse 1970 // vsetvli code. 1971 VID = convertFromScalableVector(VT, VID, DAG, Subtarget); 1972 assert(StepNumerator != 0 && "Invalid step"); 1973 bool Negate = false; 1974 if (StepNumerator != 1) { 1975 int64_t SplatStepVal = StepNumerator; 1976 unsigned Opcode = ISD::MUL; 1977 if (isPowerOf2_64(std::abs(StepNumerator))) { 1978 Negate = StepNumerator < 0; 1979 Opcode = ISD::SHL; 1980 SplatStepVal = Log2_64(std::abs(StepNumerator)); 1981 } 1982 SDValue SplatStep = DAG.getSplatVector( 1983 VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); 1984 VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep); 1985 } 1986 if (StepDenominator != 1) { 1987 SDValue SplatStep = DAG.getSplatVector( 1988 VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT)); 1989 VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep); 1990 } 1991 if (Addend != 0 || Negate) { 1992 SDValue SplatAddend = 1993 DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT)); 1994 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID); 1995 } 1996 return VID; 1997 } 1998 } 1999 2000 // Attempt to detect "hidden" splats, which only reveal themselves as splats 2001 // when re-interpreted as a vector with a larger element type. For example, 2002 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1 2003 // could be instead splat as 2004 // v2i32 = build_vector i32 0x00010000, i32 0x00010000 2005 // TODO: This optimization could also work on non-constant splats, but it 2006 // would require bit-manipulation instructions to construct the splat value. 2007 SmallVector<SDValue> Sequence; 2008 unsigned EltBitSize = VT.getScalarSizeInBits(); 2009 const auto *BV = cast<BuildVectorSDNode>(Op); 2010 if (VT.isInteger() && EltBitSize < 64 && 2011 ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && 2012 BV->getRepeatedSequence(Sequence) && 2013 (Sequence.size() * EltBitSize) <= 64) { 2014 unsigned SeqLen = Sequence.size(); 2015 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); 2016 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen); 2017 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || 2018 ViaIntVT == MVT::i64) && 2019 "Unexpected sequence type"); 2020 2021 unsigned EltIdx = 0; 2022 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize); 2023 uint64_t SplatValue = 0; 2024 // Construct the amalgamated value which can be splatted as this larger 2025 // vector type. 2026 for (const auto &SeqV : Sequence) { 2027 if (!SeqV.isUndef()) 2028 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) 2029 << (EltIdx * EltBitSize)); 2030 EltIdx++; 2031 } 2032 2033 // On RV64, sign-extend from 32 to 64 bits where possible in order to 2034 // achieve better constant materializion. 2035 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32) 2036 SplatValue = SignExtend64(SplatValue, 32); 2037 2038 // Since we can't introduce illegal i64 types at this stage, we can only 2039 // perform an i64 splat on RV32 if it is its own sign-extended value. That 2040 // way we can use RVV instructions to splat. 2041 assert((ViaIntVT.bitsLE(XLenVT) || 2042 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) && 2043 "Unexpected bitcast sequence"); 2044 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) { 2045 SDValue ViaVL = 2046 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT); 2047 MVT ViaContainerVT = 2048 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget); 2049 SDValue Splat = 2050 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT, 2051 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL); 2052 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget); 2053 return DAG.getBitcast(VT, Splat); 2054 } 2055 } 2056 2057 // Try and optimize BUILD_VECTORs with "dominant values" - these are values 2058 // which constitute a large proportion of the elements. In such cases we can 2059 // splat a vector with the dominant element and make up the shortfall with 2060 // INSERT_VECTOR_ELTs. 2061 // Note that this includes vectors of 2 elements by association. The 2062 // upper-most element is the "dominant" one, allowing us to use a splat to 2063 // "insert" the upper element, and an insert of the lower element at position 2064 // 0, which improves codegen. 2065 SDValue DominantValue; 2066 unsigned MostCommonCount = 0; 2067 DenseMap<SDValue, unsigned> ValueCounts; 2068 unsigned NumUndefElts = 2069 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); 2070 2071 // Track the number of scalar loads we know we'd be inserting, estimated as 2072 // any non-zero floating-point constant. Other kinds of element are either 2073 // already in registers or are materialized on demand. The threshold at which 2074 // a vector load is more desirable than several scalar materializion and 2075 // vector-insertion instructions is not known. 2076 unsigned NumScalarLoads = 0; 2077 2078 for (SDValue V : Op->op_values()) { 2079 if (V.isUndef()) 2080 continue; 2081 2082 ValueCounts.insert(std::make_pair(V, 0)); 2083 unsigned &Count = ValueCounts[V]; 2084 2085 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V)) 2086 NumScalarLoads += !CFP->isExactlyValue(+0.0); 2087 2088 // Is this value dominant? In case of a tie, prefer the highest element as 2089 // it's cheaper to insert near the beginning of a vector than it is at the 2090 // end. 2091 if (++Count >= MostCommonCount) { 2092 DominantValue = V; 2093 MostCommonCount = Count; 2094 } 2095 } 2096 2097 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR"); 2098 unsigned NumDefElts = NumElts - NumUndefElts; 2099 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2; 2100 2101 // Don't perform this optimization when optimizing for size, since 2102 // materializing elements and inserting them tends to cause code bloat. 2103 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && 2104 ((MostCommonCount > DominantValueCountThreshold) || 2105 (ValueCounts.size() <= Log2_32(NumDefElts)))) { 2106 // Start by splatting the most common element. 2107 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue); 2108 2109 DenseSet<SDValue> Processed{DominantValue}; 2110 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1); 2111 for (const auto &OpIdx : enumerate(Op->ops())) { 2112 const SDValue &V = OpIdx.value(); 2113 if (V.isUndef() || !Processed.insert(V).second) 2114 continue; 2115 if (ValueCounts[V] == 1) { 2116 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, 2117 DAG.getConstant(OpIdx.index(), DL, XLenVT)); 2118 } else { 2119 // Blend in all instances of this value using a VSELECT, using a 2120 // mask where each bit signals whether that element is the one 2121 // we're after. 2122 SmallVector<SDValue> Ops; 2123 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) { 2124 return DAG.getConstant(V == V1, DL, XLenVT); 2125 }); 2126 Vec = DAG.getNode(ISD::VSELECT, DL, VT, 2127 DAG.getBuildVector(SelMaskTy, DL, Ops), 2128 DAG.getSplatBuildVector(VT, DL, V), Vec); 2129 } 2130 } 2131 2132 return Vec; 2133 } 2134 2135 return SDValue(); 2136 } 2137 2138 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo, 2139 SDValue Hi, SDValue VL, SelectionDAG &DAG) { 2140 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 2141 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 2142 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 2143 // If Hi constant is all the same sign bit as Lo, lower this as a custom 2144 // node in order to try and match RVV vector/scalar instructions. 2145 if ((LoC >> 31) == HiC) 2146 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL); 2147 } 2148 2149 // Fall back to a stack store and stride x0 vector load. 2150 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL); 2151 } 2152 2153 // Called by type legalization to handle splat of i64 on RV32. 2154 // FIXME: We can optimize this when the type has sign or zero bits in one 2155 // of the halves. 2156 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar, 2157 SDValue VL, SelectionDAG &DAG) { 2158 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!"); 2159 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2160 DAG.getConstant(0, DL, MVT::i32)); 2161 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 2162 DAG.getConstant(1, DL, MVT::i32)); 2163 return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG); 2164 } 2165 2166 // This function lowers a splat of a scalar operand Splat with the vector 2167 // length VL. It ensures the final sequence is type legal, which is useful when 2168 // lowering a splat after type legalization. 2169 static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL, 2170 SelectionDAG &DAG, 2171 const RISCVSubtarget &Subtarget) { 2172 if (VT.isFloatingPoint()) 2173 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL); 2174 2175 MVT XLenVT = Subtarget.getXLenVT(); 2176 2177 // Simplest case is that the operand needs to be promoted to XLenVT. 2178 if (Scalar.getValueType().bitsLE(XLenVT)) { 2179 // If the operand is a constant, sign extend to increase our chances 2180 // of being able to use a .vi instruction. ANY_EXTEND would become a 2181 // a zero extend and the simm5 check in isel would fail. 2182 // FIXME: Should we ignore the upper bits in isel instead? 2183 unsigned ExtOpc = 2184 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 2185 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); 2186 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL); 2187 } 2188 2189 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 && 2190 "Unexpected scalar for splat lowering!"); 2191 2192 // Otherwise use the more complicated splatting algorithm. 2193 return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); 2194 } 2195 2196 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, 2197 const RISCVSubtarget &Subtarget) { 2198 SDValue V1 = Op.getOperand(0); 2199 SDValue V2 = Op.getOperand(1); 2200 SDLoc DL(Op); 2201 MVT XLenVT = Subtarget.getXLenVT(); 2202 MVT VT = Op.getSimpleValueType(); 2203 unsigned NumElts = VT.getVectorNumElements(); 2204 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 2205 2206 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); 2207 2208 SDValue TrueMask, VL; 2209 std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2210 2211 if (SVN->isSplat()) { 2212 const int Lane = SVN->getSplatIndex(); 2213 if (Lane >= 0) { 2214 MVT SVT = VT.getVectorElementType(); 2215 2216 // Turn splatted vector load into a strided load with an X0 stride. 2217 SDValue V = V1; 2218 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector 2219 // with undef. 2220 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts? 2221 int Offset = Lane; 2222 if (V.getOpcode() == ISD::CONCAT_VECTORS) { 2223 int OpElements = 2224 V.getOperand(0).getSimpleValueType().getVectorNumElements(); 2225 V = V.getOperand(Offset / OpElements); 2226 Offset %= OpElements; 2227 } 2228 2229 // We need to ensure the load isn't atomic or volatile. 2230 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) { 2231 auto *Ld = cast<LoadSDNode>(V); 2232 Offset *= SVT.getStoreSize(); 2233 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), 2234 TypeSize::Fixed(Offset), DL); 2235 2236 // If this is SEW=64 on RV32, use a strided load with a stride of x0. 2237 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) { 2238 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 2239 SDValue IntID = 2240 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT); 2241 SDValue Ops[] = {Ld->getChain(), IntID, NewAddr, 2242 DAG.getRegister(RISCV::X0, XLenVT), VL}; 2243 SDValue NewLoad = DAG.getMemIntrinsicNode( 2244 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT, 2245 DAG.getMachineFunction().getMachineMemOperand( 2246 Ld->getMemOperand(), Offset, SVT.getStoreSize())); 2247 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); 2248 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 2249 } 2250 2251 // Otherwise use a scalar load and splat. This will give the best 2252 // opportunity to fold a splat into the operation. ISel can turn it into 2253 // the x0 strided load if we aren't able to fold away the select. 2254 if (SVT.isFloatingPoint()) 2255 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, 2256 Ld->getPointerInfo().getWithOffset(Offset), 2257 Ld->getOriginalAlign(), 2258 Ld->getMemOperand()->getFlags()); 2259 else 2260 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr, 2261 Ld->getPointerInfo().getWithOffset(Offset), SVT, 2262 Ld->getOriginalAlign(), 2263 Ld->getMemOperand()->getFlags()); 2264 DAG.makeEquivalentMemoryOrdering(Ld, V); 2265 2266 unsigned Opc = 2267 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; 2268 SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL); 2269 return convertFromScalableVector(VT, Splat, DAG, Subtarget); 2270 } 2271 2272 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 2273 assert(Lane < (int)NumElts && "Unexpected lane!"); 2274 SDValue Gather = 2275 DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1, 2276 DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL); 2277 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 2278 } 2279 } 2280 2281 // Detect shuffles which can be re-expressed as vector selects; these are 2282 // shuffles in which each element in the destination is taken from an element 2283 // at the corresponding index in either source vectors. 2284 bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) { 2285 int MaskIndex = MaskIdx.value(); 2286 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts; 2287 }); 2288 2289 assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); 2290 2291 SmallVector<SDValue> MaskVals; 2292 // As a backup, shuffles can be lowered via a vrgather instruction, possibly 2293 // merged with a second vrgather. 2294 SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS; 2295 2296 // By default we preserve the original operand order, and use a mask to 2297 // select LHS as true and RHS as false. However, since RVV vector selects may 2298 // feature splats but only on the LHS, we may choose to invert our mask and 2299 // instead select between RHS and LHS. 2300 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); 2301 bool InvertMask = IsSelect == SwapOps; 2302 2303 // Keep a track of which non-undef indices are used by each LHS/RHS shuffle 2304 // half. 2305 DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts; 2306 2307 // Now construct the mask that will be used by the vselect or blended 2308 // vrgather operation. For vrgathers, construct the appropriate indices into 2309 // each vector. 2310 for (int MaskIndex : SVN->getMask()) { 2311 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask; 2312 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); 2313 if (!IsSelect) { 2314 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; 2315 GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 2316 ? DAG.getConstant(MaskIndex, DL, XLenVT) 2317 : DAG.getUNDEF(XLenVT)); 2318 GatherIndicesRHS.push_back( 2319 IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) 2320 : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); 2321 if (IsLHSOrUndefIndex && MaskIndex >= 0) 2322 ++LHSIndexCounts[MaskIndex]; 2323 if (!IsLHSOrUndefIndex) 2324 ++RHSIndexCounts[MaskIndex - NumElts]; 2325 } 2326 } 2327 2328 if (SwapOps) { 2329 std::swap(V1, V2); 2330 std::swap(GatherIndicesLHS, GatherIndicesRHS); 2331 } 2332 2333 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); 2334 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); 2335 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); 2336 2337 if (IsSelect) 2338 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); 2339 2340 if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) { 2341 // On such a large vector we're unable to use i8 as the index type. 2342 // FIXME: We could promote the index to i16 and use vrgatherei16, but that 2343 // may involve vector splitting if we're already at LMUL=8, or our 2344 // user-supplied maximum fixed-length LMUL. 2345 return SDValue(); 2346 } 2347 2348 unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL; 2349 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; 2350 MVT IndexVT = VT.changeTypeToInteger(); 2351 // Since we can't introduce illegal index types at this stage, use i16 and 2352 // vrgatherei16 if the corresponding index type for plain vrgather is greater 2353 // than XLenVT. 2354 if (IndexVT.getScalarType().bitsGT(XLenVT)) { 2355 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL; 2356 IndexVT = IndexVT.changeVectorElementType(MVT::i16); 2357 } 2358 2359 MVT IndexContainerVT = 2360 ContainerVT.changeVectorElementType(IndexVT.getScalarType()); 2361 2362 SDValue Gather; 2363 // TODO: This doesn't trigger for i64 vectors on RV32, since there we 2364 // encounter a bitcasted BUILD_VECTOR with low/high i32 values. 2365 if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { 2366 Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget); 2367 } else { 2368 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); 2369 // If only one index is used, we can use a "splat" vrgather. 2370 // TODO: We can splat the most-common index and fix-up any stragglers, if 2371 // that's beneficial. 2372 if (LHSIndexCounts.size() == 1) { 2373 int SplatIndex = LHSIndexCounts.begin()->getFirst(); 2374 Gather = 2375 DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, 2376 DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL); 2377 } else { 2378 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); 2379 LHSIndices = 2380 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); 2381 2382 Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, 2383 TrueMask, VL); 2384 } 2385 } 2386 2387 // If a second vector operand is used by this shuffle, blend it in with an 2388 // additional vrgather. 2389 if (!V2.isUndef()) { 2390 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); 2391 // If only one index is used, we can use a "splat" vrgather. 2392 // TODO: We can splat the most-common index and fix-up any stragglers, if 2393 // that's beneficial. 2394 if (RHSIndexCounts.size() == 1) { 2395 int SplatIndex = RHSIndexCounts.begin()->getFirst(); 2396 V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, 2397 DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL); 2398 } else { 2399 SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); 2400 RHSIndices = 2401 convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); 2402 V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask, 2403 VL); 2404 } 2405 2406 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 2407 SelectMask = 2408 convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); 2409 2410 Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2, 2411 Gather, VL); 2412 } 2413 2414 return convertFromScalableVector(VT, Gather, DAG, Subtarget); 2415 } 2416 2417 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT, 2418 SDLoc DL, SelectionDAG &DAG, 2419 const RISCVSubtarget &Subtarget) { 2420 if (VT.isScalableVector()) 2421 return DAG.getFPExtendOrRound(Op, DL, VT); 2422 assert(VT.isFixedLengthVector() && 2423 "Unexpected value type for RVV FP extend/round lowering"); 2424 SDValue Mask, VL; 2425 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2426 unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType()) 2427 ? RISCVISD::FP_EXTEND_VL 2428 : RISCVISD::FP_ROUND_VL; 2429 return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); 2430 } 2431 2432 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting 2433 // the exponent. 2434 static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { 2435 MVT VT = Op.getSimpleValueType(); 2436 unsigned EltSize = VT.getScalarSizeInBits(); 2437 SDValue Src = Op.getOperand(0); 2438 SDLoc DL(Op); 2439 2440 // We need a FP type that can represent the value. 2441 // TODO: Use f16 for i8 when possible? 2442 MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32; 2443 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount()); 2444 2445 // Legal types should have been checked in the RISCVTargetLowering 2446 // constructor. 2447 // TODO: Splitting may make sense in some cases. 2448 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) && 2449 "Expected legal float type!"); 2450 2451 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X. 2452 // The trailing zero count is equal to log2 of this single bit value. 2453 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { 2454 SDValue Neg = 2455 DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src); 2456 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg); 2457 } 2458 2459 // We have a legal FP type, convert to it. 2460 SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src); 2461 // Bitcast to integer and shift the exponent to the LSB. 2462 EVT IntVT = FloatVT.changeVectorElementTypeToInteger(); 2463 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal); 2464 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23; 2465 SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast, 2466 DAG.getConstant(ShiftAmt, DL, IntVT)); 2467 // Truncate back to original type to allow vnsrl. 2468 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift); 2469 // The exponent contains log2 of the value in biased form. 2470 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127; 2471 2472 // For trailing zeros, we just need to subtract the bias. 2473 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) 2474 return DAG.getNode(ISD::SUB, DL, VT, Trunc, 2475 DAG.getConstant(ExponentBias, DL, VT)); 2476 2477 // For leading zeros, we need to remove the bias and convert from log2 to 2478 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)). 2479 unsigned Adjust = ExponentBias + (EltSize - 1); 2480 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc); 2481 } 2482 2483 // While RVV has alignment restrictions, we should always be able to load as a 2484 // legal equivalently-sized byte-typed vector instead. This method is 2485 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If 2486 // the load is already correctly-aligned, it returns SDValue(). 2487 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, 2488 SelectionDAG &DAG) const { 2489 auto *Load = cast<LoadSDNode>(Op); 2490 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); 2491 2492 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 2493 Load->getMemoryVT(), 2494 *Load->getMemOperand())) 2495 return SDValue(); 2496 2497 SDLoc DL(Op); 2498 MVT VT = Op.getSimpleValueType(); 2499 unsigned EltSizeBits = VT.getScalarSizeInBits(); 2500 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 2501 "Unexpected unaligned RVV load type"); 2502 MVT NewVT = 2503 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 2504 assert(NewVT.isValid() && 2505 "Expecting equally-sized RVV vector types to be legal"); 2506 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), 2507 Load->getPointerInfo(), Load->getOriginalAlign(), 2508 Load->getMemOperand()->getFlags()); 2509 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); 2510 } 2511 2512 // While RVV has alignment restrictions, we should always be able to store as a 2513 // legal equivalently-sized byte-typed vector instead. This method is 2514 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It 2515 // returns SDValue() if the store is already correctly aligned. 2516 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, 2517 SelectionDAG &DAG) const { 2518 auto *Store = cast<StoreSDNode>(Op); 2519 assert(Store && Store->getValue().getValueType().isVector() && 2520 "Expected vector store"); 2521 2522 if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 2523 Store->getMemoryVT(), 2524 *Store->getMemOperand())) 2525 return SDValue(); 2526 2527 SDLoc DL(Op); 2528 SDValue StoredVal = Store->getValue(); 2529 MVT VT = StoredVal.getSimpleValueType(); 2530 unsigned EltSizeBits = VT.getScalarSizeInBits(); 2531 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && 2532 "Unexpected unaligned RVV store type"); 2533 MVT NewVT = 2534 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); 2535 assert(NewVT.isValid() && 2536 "Expecting equally-sized RVV vector types to be legal"); 2537 StoredVal = DAG.getBitcast(NewVT, StoredVal); 2538 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), 2539 Store->getPointerInfo(), Store->getOriginalAlign(), 2540 Store->getMemOperand()->getFlags()); 2541 } 2542 2543 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 2544 SelectionDAG &DAG) const { 2545 switch (Op.getOpcode()) { 2546 default: 2547 report_fatal_error("unimplemented operand"); 2548 case ISD::GlobalAddress: 2549 return lowerGlobalAddress(Op, DAG); 2550 case ISD::BlockAddress: 2551 return lowerBlockAddress(Op, DAG); 2552 case ISD::ConstantPool: 2553 return lowerConstantPool(Op, DAG); 2554 case ISD::JumpTable: 2555 return lowerJumpTable(Op, DAG); 2556 case ISD::GlobalTLSAddress: 2557 return lowerGlobalTLSAddress(Op, DAG); 2558 case ISD::SELECT: 2559 return lowerSELECT(Op, DAG); 2560 case ISD::BRCOND: 2561 return lowerBRCOND(Op, DAG); 2562 case ISD::VASTART: 2563 return lowerVASTART(Op, DAG); 2564 case ISD::FRAMEADDR: 2565 return lowerFRAMEADDR(Op, DAG); 2566 case ISD::RETURNADDR: 2567 return lowerRETURNADDR(Op, DAG); 2568 case ISD::SHL_PARTS: 2569 return lowerShiftLeftParts(Op, DAG); 2570 case ISD::SRA_PARTS: 2571 return lowerShiftRightParts(Op, DAG, true); 2572 case ISD::SRL_PARTS: 2573 return lowerShiftRightParts(Op, DAG, false); 2574 case ISD::BITCAST: { 2575 SDLoc DL(Op); 2576 EVT VT = Op.getValueType(); 2577 SDValue Op0 = Op.getOperand(0); 2578 EVT Op0VT = Op0.getValueType(); 2579 MVT XLenVT = Subtarget.getXLenVT(); 2580 if (VT.isFixedLengthVector()) { 2581 // We can handle fixed length vector bitcasts with a simple replacement 2582 // in isel. 2583 if (Op0VT.isFixedLengthVector()) 2584 return Op; 2585 // When bitcasting from scalar to fixed-length vector, insert the scalar 2586 // into a one-element vector of the result type, and perform a vector 2587 // bitcast. 2588 if (!Op0VT.isVector()) { 2589 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1); 2590 if (!isTypeLegal(BVT)) 2591 return SDValue(); 2592 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, 2593 DAG.getUNDEF(BVT), Op0, 2594 DAG.getConstant(0, DL, XLenVT))); 2595 } 2596 return SDValue(); 2597 } 2598 // Custom-legalize bitcasts from fixed-length vector types to scalar types 2599 // thus: bitcast the vector to a one-element vector type whose element type 2600 // is the same as the result type, and extract the first element. 2601 if (!VT.isVector() && Op0VT.isFixedLengthVector()) { 2602 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 2603 if (!isTypeLegal(BVT)) 2604 return SDValue(); 2605 SDValue BVec = DAG.getBitcast(BVT, Op0); 2606 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 2607 DAG.getConstant(0, DL, XLenVT)); 2608 } 2609 if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) { 2610 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); 2611 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); 2612 return FPConv; 2613 } 2614 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() && 2615 Subtarget.hasStdExtF()) { 2616 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 2617 SDValue FPConv = 2618 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 2619 return FPConv; 2620 } 2621 return SDValue(); 2622 } 2623 case ISD::INTRINSIC_WO_CHAIN: 2624 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 2625 case ISD::INTRINSIC_W_CHAIN: 2626 return LowerINTRINSIC_W_CHAIN(Op, DAG); 2627 case ISD::INTRINSIC_VOID: 2628 return LowerINTRINSIC_VOID(Op, DAG); 2629 case ISD::BSWAP: 2630 case ISD::BITREVERSE: { 2631 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining. 2632 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 2633 MVT VT = Op.getSimpleValueType(); 2634 SDLoc DL(Op); 2635 // Start with the maximum immediate value which is the bitwidth - 1. 2636 unsigned Imm = VT.getSizeInBits() - 1; 2637 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 2638 if (Op.getOpcode() == ISD::BSWAP) 2639 Imm &= ~0x7U; 2640 return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0), 2641 DAG.getConstant(Imm, DL, VT)); 2642 } 2643 case ISD::FSHL: 2644 case ISD::FSHR: { 2645 MVT VT = Op.getSimpleValueType(); 2646 assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization"); 2647 SDLoc DL(Op); 2648 if (Op.getOperand(2).getOpcode() == ISD::Constant) 2649 return Op; 2650 // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only 2651 // use log(XLen) bits. Mask the shift amount accordingly. 2652 unsigned ShAmtWidth = Subtarget.getXLen() - 1; 2653 SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2), 2654 DAG.getConstant(ShAmtWidth, DL, VT)); 2655 unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR; 2656 return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt); 2657 } 2658 case ISD::TRUNCATE: { 2659 SDLoc DL(Op); 2660 MVT VT = Op.getSimpleValueType(); 2661 // Only custom-lower vector truncates 2662 if (!VT.isVector()) 2663 return Op; 2664 2665 // Truncates to mask types are handled differently 2666 if (VT.getVectorElementType() == MVT::i1) 2667 return lowerVectorMaskTrunc(Op, DAG); 2668 2669 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary 2670 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which 2671 // truncate by one power of two at a time. 2672 MVT DstEltVT = VT.getVectorElementType(); 2673 2674 SDValue Src = Op.getOperand(0); 2675 MVT SrcVT = Src.getSimpleValueType(); 2676 MVT SrcEltVT = SrcVT.getVectorElementType(); 2677 2678 assert(DstEltVT.bitsLT(SrcEltVT) && 2679 isPowerOf2_64(DstEltVT.getSizeInBits()) && 2680 isPowerOf2_64(SrcEltVT.getSizeInBits()) && 2681 "Unexpected vector truncate lowering"); 2682 2683 MVT ContainerVT = SrcVT; 2684 if (SrcVT.isFixedLengthVector()) { 2685 ContainerVT = getContainerForFixedLengthVector(SrcVT); 2686 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 2687 } 2688 2689 SDValue Result = Src; 2690 SDValue Mask, VL; 2691 std::tie(Mask, VL) = 2692 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); 2693 LLVMContext &Context = *DAG.getContext(); 2694 const ElementCount Count = ContainerVT.getVectorElementCount(); 2695 do { 2696 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); 2697 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); 2698 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, 2699 Mask, VL); 2700 } while (SrcEltVT != DstEltVT); 2701 2702 if (SrcVT.isFixedLengthVector()) 2703 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 2704 2705 return Result; 2706 } 2707 case ISD::ANY_EXTEND: 2708 case ISD::ZERO_EXTEND: 2709 if (Op.getOperand(0).getValueType().isVector() && 2710 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 2711 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1); 2712 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL); 2713 case ISD::SIGN_EXTEND: 2714 if (Op.getOperand(0).getValueType().isVector() && 2715 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 2716 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); 2717 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); 2718 case ISD::SPLAT_VECTOR_PARTS: 2719 return lowerSPLAT_VECTOR_PARTS(Op, DAG); 2720 case ISD::INSERT_VECTOR_ELT: 2721 return lowerINSERT_VECTOR_ELT(Op, DAG); 2722 case ISD::EXTRACT_VECTOR_ELT: 2723 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 2724 case ISD::VSCALE: { 2725 MVT VT = Op.getSimpleValueType(); 2726 SDLoc DL(Op); 2727 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT); 2728 // We define our scalable vector types for lmul=1 to use a 64 bit known 2729 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate 2730 // vscale as VLENB / 8. 2731 assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!"); 2732 if (isa<ConstantSDNode>(Op.getOperand(0))) { 2733 // We assume VLENB is a multiple of 8. We manually choose the best shift 2734 // here because SimplifyDemandedBits isn't always able to simplify it. 2735 uint64_t Val = Op.getConstantOperandVal(0); 2736 if (isPowerOf2_64(Val)) { 2737 uint64_t Log2 = Log2_64(Val); 2738 if (Log2 < 3) 2739 return DAG.getNode(ISD::SRL, DL, VT, VLENB, 2740 DAG.getConstant(3 - Log2, DL, VT)); 2741 if (Log2 > 3) 2742 return DAG.getNode(ISD::SHL, DL, VT, VLENB, 2743 DAG.getConstant(Log2 - 3, DL, VT)); 2744 return VLENB; 2745 } 2746 // If the multiplier is a multiple of 8, scale it down to avoid needing 2747 // to shift the VLENB value. 2748 if ((Val % 8) == 0) 2749 return DAG.getNode(ISD::MUL, DL, VT, VLENB, 2750 DAG.getConstant(Val / 8, DL, VT)); 2751 } 2752 2753 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB, 2754 DAG.getConstant(3, DL, VT)); 2755 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); 2756 } 2757 case ISD::FPOWI: { 2758 // Custom promote f16 powi with illegal i32 integer type on RV64. Once 2759 // promoted this will be legalized into a libcall by LegalizeIntegerTypes. 2760 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && 2761 Op.getOperand(1).getValueType() == MVT::i32) { 2762 SDLoc DL(Op); 2763 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); 2764 SDValue Powi = 2765 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); 2766 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, 2767 DAG.getIntPtrConstant(0, DL)); 2768 } 2769 return SDValue(); 2770 } 2771 case ISD::FP_EXTEND: { 2772 // RVV can only do fp_extend to types double the size as the source. We 2773 // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going 2774 // via f32. 2775 SDLoc DL(Op); 2776 MVT VT = Op.getSimpleValueType(); 2777 SDValue Src = Op.getOperand(0); 2778 MVT SrcVT = Src.getSimpleValueType(); 2779 2780 // Prepare any fixed-length vector operands. 2781 MVT ContainerVT = VT; 2782 if (SrcVT.isFixedLengthVector()) { 2783 ContainerVT = getContainerForFixedLengthVector(VT); 2784 MVT SrcContainerVT = 2785 ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); 2786 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 2787 } 2788 2789 if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 || 2790 SrcVT.getVectorElementType() != MVT::f16) { 2791 // For scalable vectors, we only need to close the gap between 2792 // vXf16->vXf64. 2793 if (!VT.isFixedLengthVector()) 2794 return Op; 2795 // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version. 2796 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 2797 return convertFromScalableVector(VT, Src, DAG, Subtarget); 2798 } 2799 2800 MVT InterVT = VT.changeVectorElementType(MVT::f32); 2801 MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32); 2802 SDValue IntermediateExtend = getRVVFPExtendOrRound( 2803 Src, InterVT, InterContainerVT, DL, DAG, Subtarget); 2804 2805 SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT, 2806 DL, DAG, Subtarget); 2807 if (VT.isFixedLengthVector()) 2808 return convertFromScalableVector(VT, Extend, DAG, Subtarget); 2809 return Extend; 2810 } 2811 case ISD::FP_ROUND: { 2812 // RVV can only do fp_round to types half the size as the source. We 2813 // custom-lower f64->f16 rounds via RVV's round-to-odd float 2814 // conversion instruction. 2815 SDLoc DL(Op); 2816 MVT VT = Op.getSimpleValueType(); 2817 SDValue Src = Op.getOperand(0); 2818 MVT SrcVT = Src.getSimpleValueType(); 2819 2820 // Prepare any fixed-length vector operands. 2821 MVT ContainerVT = VT; 2822 if (VT.isFixedLengthVector()) { 2823 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 2824 ContainerVT = 2825 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 2826 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 2827 } 2828 2829 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || 2830 SrcVT.getVectorElementType() != MVT::f64) { 2831 // For scalable vectors, we only need to close the gap between 2832 // vXf64<->vXf16. 2833 if (!VT.isFixedLengthVector()) 2834 return Op; 2835 // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version. 2836 Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget); 2837 return convertFromScalableVector(VT, Src, DAG, Subtarget); 2838 } 2839 2840 SDValue Mask, VL; 2841 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2842 2843 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32); 2844 SDValue IntermediateRound = 2845 DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL); 2846 SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT, 2847 DL, DAG, Subtarget); 2848 2849 if (VT.isFixedLengthVector()) 2850 return convertFromScalableVector(VT, Round, DAG, Subtarget); 2851 return Round; 2852 } 2853 case ISD::FP_TO_SINT: 2854 case ISD::FP_TO_UINT: 2855 case ISD::SINT_TO_FP: 2856 case ISD::UINT_TO_FP: { 2857 // RVV can only do fp<->int conversions to types half/double the size as 2858 // the source. We custom-lower any conversions that do two hops into 2859 // sequences. 2860 MVT VT = Op.getSimpleValueType(); 2861 if (!VT.isVector()) 2862 return Op; 2863 SDLoc DL(Op); 2864 SDValue Src = Op.getOperand(0); 2865 MVT EltVT = VT.getVectorElementType(); 2866 MVT SrcVT = Src.getSimpleValueType(); 2867 MVT SrcEltVT = SrcVT.getVectorElementType(); 2868 unsigned EltSize = EltVT.getSizeInBits(); 2869 unsigned SrcEltSize = SrcEltVT.getSizeInBits(); 2870 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) && 2871 "Unexpected vector element types"); 2872 2873 bool IsInt2FP = SrcEltVT.isInteger(); 2874 // Widening conversions 2875 if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) { 2876 if (IsInt2FP) { 2877 // Do a regular integer sign/zero extension then convert to float. 2878 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()), 2879 VT.getVectorElementCount()); 2880 unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP 2881 ? ISD::ZERO_EXTEND 2882 : ISD::SIGN_EXTEND; 2883 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src); 2884 return DAG.getNode(Op.getOpcode(), DL, VT, Ext); 2885 } 2886 // FP2Int 2887 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering"); 2888 // Do one doubling fp_extend then complete the operation by converting 2889 // to int. 2890 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 2891 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT); 2892 return DAG.getNode(Op.getOpcode(), DL, VT, FExt); 2893 } 2894 2895 // Narrowing conversions 2896 if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) { 2897 if (IsInt2FP) { 2898 // One narrowing int_to_fp, then an fp_round. 2899 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering"); 2900 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); 2901 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src); 2902 return DAG.getFPExtendOrRound(Int2FP, DL, VT); 2903 } 2904 // FP2Int 2905 // One narrowing fp_to_int, then truncate the integer. If the float isn't 2906 // representable by the integer, the result is poison. 2907 MVT IVecVT = 2908 MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2), 2909 VT.getVectorElementCount()); 2910 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src); 2911 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int); 2912 } 2913 2914 // Scalable vectors can exit here. Patterns will handle equally-sized 2915 // conversions halving/doubling ones. 2916 if (!VT.isFixedLengthVector()) 2917 return Op; 2918 2919 // For fixed-length vectors we lower to a custom "VL" node. 2920 unsigned RVVOpc = 0; 2921 switch (Op.getOpcode()) { 2922 default: 2923 llvm_unreachable("Impossible opcode"); 2924 case ISD::FP_TO_SINT: 2925 RVVOpc = RISCVISD::FP_TO_SINT_VL; 2926 break; 2927 case ISD::FP_TO_UINT: 2928 RVVOpc = RISCVISD::FP_TO_UINT_VL; 2929 break; 2930 case ISD::SINT_TO_FP: 2931 RVVOpc = RISCVISD::SINT_TO_FP_VL; 2932 break; 2933 case ISD::UINT_TO_FP: 2934 RVVOpc = RISCVISD::UINT_TO_FP_VL; 2935 break; 2936 } 2937 2938 MVT ContainerVT, SrcContainerVT; 2939 // Derive the reference container type from the larger vector type. 2940 if (SrcEltSize > EltSize) { 2941 SrcContainerVT = getContainerForFixedLengthVector(SrcVT); 2942 ContainerVT = 2943 SrcContainerVT.changeVectorElementType(VT.getVectorElementType()); 2944 } else { 2945 ContainerVT = getContainerForFixedLengthVector(VT); 2946 SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT); 2947 } 2948 2949 SDValue Mask, VL; 2950 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 2951 2952 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget); 2953 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL); 2954 return convertFromScalableVector(VT, Src, DAG, Subtarget); 2955 } 2956 case ISD::FP_TO_SINT_SAT: 2957 case ISD::FP_TO_UINT_SAT: 2958 return lowerFP_TO_INT_SAT(Op, DAG); 2959 case ISD::FTRUNC: 2960 case ISD::FCEIL: 2961 case ISD::FFLOOR: 2962 return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG); 2963 case ISD::VECREDUCE_ADD: 2964 case ISD::VECREDUCE_UMAX: 2965 case ISD::VECREDUCE_SMAX: 2966 case ISD::VECREDUCE_UMIN: 2967 case ISD::VECREDUCE_SMIN: 2968 return lowerVECREDUCE(Op, DAG); 2969 case ISD::VECREDUCE_AND: 2970 case ISD::VECREDUCE_OR: 2971 case ISD::VECREDUCE_XOR: 2972 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) 2973 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false); 2974 return lowerVECREDUCE(Op, DAG); 2975 case ISD::VECREDUCE_FADD: 2976 case ISD::VECREDUCE_SEQ_FADD: 2977 case ISD::VECREDUCE_FMIN: 2978 case ISD::VECREDUCE_FMAX: 2979 return lowerFPVECREDUCE(Op, DAG); 2980 case ISD::VP_REDUCE_ADD: 2981 case ISD::VP_REDUCE_UMAX: 2982 case ISD::VP_REDUCE_SMAX: 2983 case ISD::VP_REDUCE_UMIN: 2984 case ISD::VP_REDUCE_SMIN: 2985 case ISD::VP_REDUCE_FADD: 2986 case ISD::VP_REDUCE_SEQ_FADD: 2987 case ISD::VP_REDUCE_FMIN: 2988 case ISD::VP_REDUCE_FMAX: 2989 return lowerVPREDUCE(Op, DAG); 2990 case ISD::VP_REDUCE_AND: 2991 case ISD::VP_REDUCE_OR: 2992 case ISD::VP_REDUCE_XOR: 2993 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1) 2994 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true); 2995 return lowerVPREDUCE(Op, DAG); 2996 case ISD::INSERT_SUBVECTOR: 2997 return lowerINSERT_SUBVECTOR(Op, DAG); 2998 case ISD::EXTRACT_SUBVECTOR: 2999 return lowerEXTRACT_SUBVECTOR(Op, DAG); 3000 case ISD::STEP_VECTOR: 3001 return lowerSTEP_VECTOR(Op, DAG); 3002 case ISD::VECTOR_REVERSE: 3003 return lowerVECTOR_REVERSE(Op, DAG); 3004 case ISD::BUILD_VECTOR: 3005 return lowerBUILD_VECTOR(Op, DAG, Subtarget); 3006 case ISD::SPLAT_VECTOR: 3007 if (Op.getValueType().getVectorElementType() == MVT::i1) 3008 return lowerVectorMaskSplat(Op, DAG); 3009 return lowerSPLAT_VECTOR(Op, DAG, Subtarget); 3010 case ISD::VECTOR_SHUFFLE: 3011 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); 3012 case ISD::CONCAT_VECTORS: { 3013 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is 3014 // better than going through the stack, as the default expansion does. 3015 SDLoc DL(Op); 3016 MVT VT = Op.getSimpleValueType(); 3017 unsigned NumOpElts = 3018 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements(); 3019 SDValue Vec = DAG.getUNDEF(VT); 3020 for (const auto &OpIdx : enumerate(Op->ops())) 3021 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(), 3022 DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL)); 3023 return Vec; 3024 } 3025 case ISD::LOAD: 3026 if (auto V = expandUnalignedRVVLoad(Op, DAG)) 3027 return V; 3028 if (Op.getValueType().isFixedLengthVector()) 3029 return lowerFixedLengthVectorLoadToRVV(Op, DAG); 3030 return Op; 3031 case ISD::STORE: 3032 if (auto V = expandUnalignedRVVStore(Op, DAG)) 3033 return V; 3034 if (Op.getOperand(1).getValueType().isFixedLengthVector()) 3035 return lowerFixedLengthVectorStoreToRVV(Op, DAG); 3036 return Op; 3037 case ISD::MLOAD: 3038 case ISD::VP_LOAD: 3039 return lowerMaskedLoad(Op, DAG); 3040 case ISD::MSTORE: 3041 case ISD::VP_STORE: 3042 return lowerMaskedStore(Op, DAG); 3043 case ISD::SETCC: 3044 return lowerFixedLengthVectorSetccToRVV(Op, DAG); 3045 case ISD::ADD: 3046 return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL); 3047 case ISD::SUB: 3048 return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL); 3049 case ISD::MUL: 3050 return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL); 3051 case ISD::MULHS: 3052 return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL); 3053 case ISD::MULHU: 3054 return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL); 3055 case ISD::AND: 3056 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL, 3057 RISCVISD::AND_VL); 3058 case ISD::OR: 3059 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL, 3060 RISCVISD::OR_VL); 3061 case ISD::XOR: 3062 return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL, 3063 RISCVISD::XOR_VL); 3064 case ISD::SDIV: 3065 return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL); 3066 case ISD::SREM: 3067 return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL); 3068 case ISD::UDIV: 3069 return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL); 3070 case ISD::UREM: 3071 return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); 3072 case ISD::SHL: 3073 case ISD::SRA: 3074 case ISD::SRL: 3075 if (Op.getSimpleValueType().isFixedLengthVector()) 3076 return lowerFixedLengthVectorShiftToRVV(Op, DAG); 3077 // This can be called for an i32 shift amount that needs to be promoted. 3078 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && 3079 "Unexpected custom legalisation"); 3080 return SDValue(); 3081 case ISD::SADDSAT: 3082 return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL); 3083 case ISD::UADDSAT: 3084 return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL); 3085 case ISD::SSUBSAT: 3086 return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL); 3087 case ISD::USUBSAT: 3088 return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL); 3089 case ISD::FADD: 3090 return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); 3091 case ISD::FSUB: 3092 return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL); 3093 case ISD::FMUL: 3094 return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL); 3095 case ISD::FDIV: 3096 return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL); 3097 case ISD::FNEG: 3098 return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); 3099 case ISD::FABS: 3100 return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL); 3101 case ISD::FSQRT: 3102 return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); 3103 case ISD::FMA: 3104 return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); 3105 case ISD::SMIN: 3106 return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); 3107 case ISD::SMAX: 3108 return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); 3109 case ISD::UMIN: 3110 return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); 3111 case ISD::UMAX: 3112 return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); 3113 case ISD::FMINNUM: 3114 return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL); 3115 case ISD::FMAXNUM: 3116 return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL); 3117 case ISD::ABS: 3118 return lowerABS(Op, DAG); 3119 case ISD::CTLZ_ZERO_UNDEF: 3120 case ISD::CTTZ_ZERO_UNDEF: 3121 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG); 3122 case ISD::VSELECT: 3123 return lowerFixedLengthVectorSelectToRVV(Op, DAG); 3124 case ISD::FCOPYSIGN: 3125 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG); 3126 case ISD::MGATHER: 3127 case ISD::VP_GATHER: 3128 return lowerMaskedGather(Op, DAG); 3129 case ISD::MSCATTER: 3130 case ISD::VP_SCATTER: 3131 return lowerMaskedScatter(Op, DAG); 3132 case ISD::FLT_ROUNDS_: 3133 return lowerGET_ROUNDING(Op, DAG); 3134 case ISD::SET_ROUNDING: 3135 return lowerSET_ROUNDING(Op, DAG); 3136 case ISD::VP_ADD: 3137 return lowerVPOp(Op, DAG, RISCVISD::ADD_VL); 3138 case ISD::VP_SUB: 3139 return lowerVPOp(Op, DAG, RISCVISD::SUB_VL); 3140 case ISD::VP_MUL: 3141 return lowerVPOp(Op, DAG, RISCVISD::MUL_VL); 3142 case ISD::VP_SDIV: 3143 return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL); 3144 case ISD::VP_UDIV: 3145 return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL); 3146 case ISD::VP_SREM: 3147 return lowerVPOp(Op, DAG, RISCVISD::SREM_VL); 3148 case ISD::VP_UREM: 3149 return lowerVPOp(Op, DAG, RISCVISD::UREM_VL); 3150 case ISD::VP_AND: 3151 return lowerVPOp(Op, DAG, RISCVISD::AND_VL); 3152 case ISD::VP_OR: 3153 return lowerVPOp(Op, DAG, RISCVISD::OR_VL); 3154 case ISD::VP_XOR: 3155 return lowerVPOp(Op, DAG, RISCVISD::XOR_VL); 3156 case ISD::VP_ASHR: 3157 return lowerVPOp(Op, DAG, RISCVISD::SRA_VL); 3158 case ISD::VP_LSHR: 3159 return lowerVPOp(Op, DAG, RISCVISD::SRL_VL); 3160 case ISD::VP_SHL: 3161 return lowerVPOp(Op, DAG, RISCVISD::SHL_VL); 3162 case ISD::VP_FADD: 3163 return lowerVPOp(Op, DAG, RISCVISD::FADD_VL); 3164 case ISD::VP_FSUB: 3165 return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL); 3166 case ISD::VP_FMUL: 3167 return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL); 3168 case ISD::VP_FDIV: 3169 return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL); 3170 } 3171 } 3172 3173 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 3174 SelectionDAG &DAG, unsigned Flags) { 3175 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 3176 } 3177 3178 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 3179 SelectionDAG &DAG, unsigned Flags) { 3180 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 3181 Flags); 3182 } 3183 3184 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 3185 SelectionDAG &DAG, unsigned Flags) { 3186 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 3187 N->getOffset(), Flags); 3188 } 3189 3190 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, 3191 SelectionDAG &DAG, unsigned Flags) { 3192 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags); 3193 } 3194 3195 template <class NodeTy> 3196 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 3197 bool IsLocal) const { 3198 SDLoc DL(N); 3199 EVT Ty = getPointerTy(DAG.getDataLayout()); 3200 3201 if (isPositionIndependent()) { 3202 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 3203 if (IsLocal) 3204 // Use PC-relative addressing to access the symbol. This generates the 3205 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 3206 // %pcrel_lo(auipc)). 3207 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 3208 3209 // Use PC-relative addressing to access the GOT for this symbol, then load 3210 // the address from the GOT. This generates the pattern (PseudoLA sym), 3211 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 3212 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 3213 } 3214 3215 switch (getTargetMachine().getCodeModel()) { 3216 default: 3217 report_fatal_error("Unsupported code model for lowering"); 3218 case CodeModel::Small: { 3219 // Generate a sequence for accessing addresses within the first 2 GiB of 3220 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 3221 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 3222 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 3223 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 3224 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 3225 } 3226 case CodeModel::Medium: { 3227 // Generate a sequence for accessing addresses within any 2GiB range within 3228 // the address space. This generates the pattern (PseudoLLA sym), which 3229 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 3230 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 3231 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 3232 } 3233 } 3234 } 3235 3236 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 3237 SelectionDAG &DAG) const { 3238 SDLoc DL(Op); 3239 EVT Ty = Op.getValueType(); 3240 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 3241 int64_t Offset = N->getOffset(); 3242 MVT XLenVT = Subtarget.getXLenVT(); 3243 3244 const GlobalValue *GV = N->getGlobal(); 3245 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 3246 SDValue Addr = getAddr(N, DAG, IsLocal); 3247 3248 // In order to maximise the opportunity for common subexpression elimination, 3249 // emit a separate ADD node for the global address offset instead of folding 3250 // it in the global address node. Later peephole optimisations may choose to 3251 // fold it back in when profitable. 3252 if (Offset != 0) 3253 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 3254 DAG.getConstant(Offset, DL, XLenVT)); 3255 return Addr; 3256 } 3257 3258 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 3259 SelectionDAG &DAG) const { 3260 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 3261 3262 return getAddr(N, DAG); 3263 } 3264 3265 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 3266 SelectionDAG &DAG) const { 3267 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 3268 3269 return getAddr(N, DAG); 3270 } 3271 3272 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op, 3273 SelectionDAG &DAG) const { 3274 JumpTableSDNode *N = cast<JumpTableSDNode>(Op); 3275 3276 return getAddr(N, DAG); 3277 } 3278 3279 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 3280 SelectionDAG &DAG, 3281 bool UseGOT) const { 3282 SDLoc DL(N); 3283 EVT Ty = getPointerTy(DAG.getDataLayout()); 3284 const GlobalValue *GV = N->getGlobal(); 3285 MVT XLenVT = Subtarget.getXLenVT(); 3286 3287 if (UseGOT) { 3288 // Use PC-relative addressing to access the GOT for this TLS symbol, then 3289 // load the address from the GOT and add the thread pointer. This generates 3290 // the pattern (PseudoLA_TLS_IE sym), which expands to 3291 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 3292 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 3293 SDValue Load = 3294 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 3295 3296 // Add the thread pointer. 3297 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 3298 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 3299 } 3300 3301 // Generate a sequence for accessing the address relative to the thread 3302 // pointer, with the appropriate adjustment for the thread pointer offset. 3303 // This generates the pattern 3304 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 3305 SDValue AddrHi = 3306 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 3307 SDValue AddrAdd = 3308 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 3309 SDValue AddrLo = 3310 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 3311 3312 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 3313 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 3314 SDValue MNAdd = SDValue( 3315 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 3316 0); 3317 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 3318 } 3319 3320 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 3321 SelectionDAG &DAG) const { 3322 SDLoc DL(N); 3323 EVT Ty = getPointerTy(DAG.getDataLayout()); 3324 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 3325 const GlobalValue *GV = N->getGlobal(); 3326 3327 // Use a PC-relative addressing mode to access the global dynamic GOT address. 3328 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 3329 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 3330 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 3331 SDValue Load = 3332 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 3333 3334 // Prepare argument list to generate call. 3335 ArgListTy Args; 3336 ArgListEntry Entry; 3337 Entry.Node = Load; 3338 Entry.Ty = CallTy; 3339 Args.push_back(Entry); 3340 3341 // Setup call to __tls_get_addr. 3342 TargetLowering::CallLoweringInfo CLI(DAG); 3343 CLI.setDebugLoc(DL) 3344 .setChain(DAG.getEntryNode()) 3345 .setLibCallee(CallingConv::C, CallTy, 3346 DAG.getExternalSymbol("__tls_get_addr", Ty), 3347 std::move(Args)); 3348 3349 return LowerCallTo(CLI).first; 3350 } 3351 3352 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 3353 SelectionDAG &DAG) const { 3354 SDLoc DL(Op); 3355 EVT Ty = Op.getValueType(); 3356 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 3357 int64_t Offset = N->getOffset(); 3358 MVT XLenVT = Subtarget.getXLenVT(); 3359 3360 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 3361 3362 if (DAG.getMachineFunction().getFunction().getCallingConv() == 3363 CallingConv::GHC) 3364 report_fatal_error("In GHC calling convention TLS is not supported"); 3365 3366 SDValue Addr; 3367 switch (Model) { 3368 case TLSModel::LocalExec: 3369 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 3370 break; 3371 case TLSModel::InitialExec: 3372 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 3373 break; 3374 case TLSModel::LocalDynamic: 3375 case TLSModel::GeneralDynamic: 3376 Addr = getDynamicTLSAddr(N, DAG); 3377 break; 3378 } 3379 3380 // In order to maximise the opportunity for common subexpression elimination, 3381 // emit a separate ADD node for the global address offset instead of folding 3382 // it in the global address node. Later peephole optimisations may choose to 3383 // fold it back in when profitable. 3384 if (Offset != 0) 3385 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 3386 DAG.getConstant(Offset, DL, XLenVT)); 3387 return Addr; 3388 } 3389 3390 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 3391 SDValue CondV = Op.getOperand(0); 3392 SDValue TrueV = Op.getOperand(1); 3393 SDValue FalseV = Op.getOperand(2); 3394 SDLoc DL(Op); 3395 MVT VT = Op.getSimpleValueType(); 3396 MVT XLenVT = Subtarget.getXLenVT(); 3397 3398 // Lower vector SELECTs to VSELECTs by splatting the condition. 3399 if (VT.isVector()) { 3400 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1); 3401 SDValue CondSplat = VT.isScalableVector() 3402 ? DAG.getSplatVector(SplatCondVT, DL, CondV) 3403 : DAG.getSplatBuildVector(SplatCondVT, DL, CondV); 3404 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); 3405 } 3406 3407 // If the result type is XLenVT and CondV is the output of a SETCC node 3408 // which also operated on XLenVT inputs, then merge the SETCC node into the 3409 // lowered RISCVISD::SELECT_CC to take advantage of the integer 3410 // compare+branch instructions. i.e.: 3411 // (select (setcc lhs, rhs, cc), truev, falsev) 3412 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 3413 if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC && 3414 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 3415 SDValue LHS = CondV.getOperand(0); 3416 SDValue RHS = CondV.getOperand(1); 3417 const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 3418 ISD::CondCode CCVal = CC->get(); 3419 3420 // Special case for a select of 2 constants that have a diffence of 1. 3421 // Normally this is done by DAGCombine, but if the select is introduced by 3422 // type legalization or op legalization, we miss it. Restricting to SETLT 3423 // case for now because that is what signed saturating add/sub need. 3424 // FIXME: We don't need the condition to be SETLT or even a SETCC, 3425 // but we would probably want to swap the true/false values if the condition 3426 // is SETGE/SETLE to avoid an XORI. 3427 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && 3428 CCVal == ISD::SETLT) { 3429 const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue(); 3430 const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue(); 3431 if (TrueVal - 1 == FalseVal) 3432 return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV); 3433 if (TrueVal + 1 == FalseVal) 3434 return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV); 3435 } 3436 3437 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 3438 3439 SDValue TargetCC = DAG.getCondCode(CCVal); 3440 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 3441 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 3442 } 3443 3444 // Otherwise: 3445 // (select condv, truev, falsev) 3446 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 3447 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 3448 SDValue SetNE = DAG.getCondCode(ISD::SETNE); 3449 3450 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 3451 3452 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops); 3453 } 3454 3455 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 3456 SDValue CondV = Op.getOperand(1); 3457 SDLoc DL(Op); 3458 MVT XLenVT = Subtarget.getXLenVT(); 3459 3460 if (CondV.getOpcode() == ISD::SETCC && 3461 CondV.getOperand(0).getValueType() == XLenVT) { 3462 SDValue LHS = CondV.getOperand(0); 3463 SDValue RHS = CondV.getOperand(1); 3464 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get(); 3465 3466 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 3467 3468 SDValue TargetCC = DAG.getCondCode(CCVal); 3469 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 3470 LHS, RHS, TargetCC, Op.getOperand(2)); 3471 } 3472 3473 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0), 3474 CondV, DAG.getConstant(0, DL, XLenVT), 3475 DAG.getCondCode(ISD::SETNE), Op.getOperand(2)); 3476 } 3477 3478 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 3479 MachineFunction &MF = DAG.getMachineFunction(); 3480 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 3481 3482 SDLoc DL(Op); 3483 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 3484 getPointerTy(MF.getDataLayout())); 3485 3486 // vastart just stores the address of the VarArgsFrameIndex slot into the 3487 // memory location argument. 3488 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3489 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 3490 MachinePointerInfo(SV)); 3491 } 3492 3493 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 3494 SelectionDAG &DAG) const { 3495 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 3496 MachineFunction &MF = DAG.getMachineFunction(); 3497 MachineFrameInfo &MFI = MF.getFrameInfo(); 3498 MFI.setFrameAddressIsTaken(true); 3499 Register FrameReg = RI.getFrameRegister(MF); 3500 int XLenInBytes = Subtarget.getXLen() / 8; 3501 3502 EVT VT = Op.getValueType(); 3503 SDLoc DL(Op); 3504 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 3505 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3506 while (Depth--) { 3507 int Offset = -(XLenInBytes * 2); 3508 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 3509 DAG.getIntPtrConstant(Offset, DL)); 3510 FrameAddr = 3511 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 3512 } 3513 return FrameAddr; 3514 } 3515 3516 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 3517 SelectionDAG &DAG) const { 3518 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 3519 MachineFunction &MF = DAG.getMachineFunction(); 3520 MachineFrameInfo &MFI = MF.getFrameInfo(); 3521 MFI.setReturnAddressIsTaken(true); 3522 MVT XLenVT = Subtarget.getXLenVT(); 3523 int XLenInBytes = Subtarget.getXLen() / 8; 3524 3525 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 3526 return SDValue(); 3527 3528 EVT VT = Op.getValueType(); 3529 SDLoc DL(Op); 3530 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3531 if (Depth) { 3532 int Off = -XLenInBytes; 3533 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 3534 SDValue Offset = DAG.getConstant(Off, DL, VT); 3535 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 3536 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 3537 MachinePointerInfo()); 3538 } 3539 3540 // Return the value of the return address register, marking it an implicit 3541 // live-in. 3542 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 3543 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 3544 } 3545 3546 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 3547 SelectionDAG &DAG) const { 3548 SDLoc DL(Op); 3549 SDValue Lo = Op.getOperand(0); 3550 SDValue Hi = Op.getOperand(1); 3551 SDValue Shamt = Op.getOperand(2); 3552 EVT VT = Lo.getValueType(); 3553 3554 // if Shamt-XLEN < 0: // Shamt < XLEN 3555 // Lo = Lo << Shamt 3556 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 3557 // else: 3558 // Lo = 0 3559 // Hi = Lo << (Shamt-XLEN) 3560 3561 SDValue Zero = DAG.getConstant(0, DL, VT); 3562 SDValue One = DAG.getConstant(1, DL, VT); 3563 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 3564 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 3565 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 3566 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 3567 3568 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 3569 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 3570 SDValue ShiftRightLo = 3571 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 3572 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 3573 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 3574 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 3575 3576 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 3577 3578 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 3579 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 3580 3581 SDValue Parts[2] = {Lo, Hi}; 3582 return DAG.getMergeValues(Parts, DL); 3583 } 3584 3585 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 3586 bool IsSRA) const { 3587 SDLoc DL(Op); 3588 SDValue Lo = Op.getOperand(0); 3589 SDValue Hi = Op.getOperand(1); 3590 SDValue Shamt = Op.getOperand(2); 3591 EVT VT = Lo.getValueType(); 3592 3593 // SRA expansion: 3594 // if Shamt-XLEN < 0: // Shamt < XLEN 3595 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 3596 // Hi = Hi >>s Shamt 3597 // else: 3598 // Lo = Hi >>s (Shamt-XLEN); 3599 // Hi = Hi >>s (XLEN-1) 3600 // 3601 // SRL expansion: 3602 // if Shamt-XLEN < 0: // Shamt < XLEN 3603 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 3604 // Hi = Hi >>u Shamt 3605 // else: 3606 // Lo = Hi >>u (Shamt-XLEN); 3607 // Hi = 0; 3608 3609 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 3610 3611 SDValue Zero = DAG.getConstant(0, DL, VT); 3612 SDValue One = DAG.getConstant(1, DL, VT); 3613 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 3614 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 3615 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 3616 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 3617 3618 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 3619 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 3620 SDValue ShiftLeftHi = 3621 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 3622 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 3623 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 3624 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 3625 SDValue HiFalse = 3626 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 3627 3628 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 3629 3630 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 3631 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 3632 3633 SDValue Parts[2] = {Lo, Hi}; 3634 return DAG.getMergeValues(Parts, DL); 3635 } 3636 3637 // Lower splats of i1 types to SETCC. For each mask vector type, we have a 3638 // legal equivalently-sized i8 type, so we can use that as a go-between. 3639 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, 3640 SelectionDAG &DAG) const { 3641 SDLoc DL(Op); 3642 MVT VT = Op.getSimpleValueType(); 3643 SDValue SplatVal = Op.getOperand(0); 3644 // All-zeros or all-ones splats are handled specially. 3645 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) { 3646 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 3647 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL); 3648 } 3649 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) { 3650 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second; 3651 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL); 3652 } 3653 MVT XLenVT = Subtarget.getXLenVT(); 3654 assert(SplatVal.getValueType() == XLenVT && 3655 "Unexpected type for i1 splat value"); 3656 MVT InterVT = VT.changeVectorElementType(MVT::i8); 3657 SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal, 3658 DAG.getConstant(1, DL, XLenVT)); 3659 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); 3660 SDValue Zero = DAG.getConstant(0, DL, InterVT); 3661 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); 3662 } 3663 3664 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is 3665 // illegal (currently only vXi64 RV32). 3666 // FIXME: We could also catch non-constant sign-extended i32 values and lower 3667 // them to SPLAT_VECTOR_I64 3668 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op, 3669 SelectionDAG &DAG) const { 3670 SDLoc DL(Op); 3671 MVT VecVT = Op.getSimpleValueType(); 3672 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 && 3673 "Unexpected SPLAT_VECTOR_PARTS lowering"); 3674 3675 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!"); 3676 SDValue Lo = Op.getOperand(0); 3677 SDValue Hi = Op.getOperand(1); 3678 3679 if (VecVT.isFixedLengthVector()) { 3680 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 3681 SDLoc DL(Op); 3682 SDValue Mask, VL; 3683 std::tie(Mask, VL) = 3684 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 3685 3686 SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG); 3687 return convertFromScalableVector(VecVT, Res, DAG, Subtarget); 3688 } 3689 3690 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) { 3691 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue(); 3692 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue(); 3693 // If Hi constant is all the same sign bit as Lo, lower this as a custom 3694 // node in order to try and match RVV vector/scalar instructions. 3695 if ((LoC >> 31) == HiC) 3696 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 3697 } 3698 3699 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended. 3700 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo && 3701 isa<ConstantSDNode>(Hi.getOperand(1)) && 3702 Hi.getConstantOperandVal(1) == 31) 3703 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo); 3704 3705 // Fall back to use a stack store and stride x0 vector load. Use X0 as VL. 3706 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi, 3707 DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64)); 3708 } 3709 3710 // Custom-lower extensions from mask vectors by using a vselect either with 1 3711 // for zero/any-extension or -1 for sign-extension: 3712 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) 3713 // Note that any-extension is lowered identically to zero-extension. 3714 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 3715 int64_t ExtTrueVal) const { 3716 SDLoc DL(Op); 3717 MVT VecVT = Op.getSimpleValueType(); 3718 SDValue Src = Op.getOperand(0); 3719 // Only custom-lower extensions from mask types 3720 assert(Src.getValueType().isVector() && 3721 Src.getValueType().getVectorElementType() == MVT::i1); 3722 3723 MVT XLenVT = Subtarget.getXLenVT(); 3724 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); 3725 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT); 3726 3727 if (VecVT.isScalableVector()) { 3728 // Be careful not to introduce illegal scalar types at this stage, and be 3729 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is 3730 // illegal and must be expanded. Since we know that the constants are 3731 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly. 3732 bool IsRV32E64 = 3733 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64; 3734 3735 if (!IsRV32E64) { 3736 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero); 3737 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal); 3738 } else { 3739 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero); 3740 SplatTrueVal = 3741 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal); 3742 } 3743 3744 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero); 3745 } 3746 3747 MVT ContainerVT = getContainerForFixedLengthVector(VecVT); 3748 MVT I1ContainerVT = 3749 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3750 3751 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget); 3752 3753 SDValue Mask, VL; 3754 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 3755 3756 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL); 3757 SplatTrueVal = 3758 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL); 3759 SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, 3760 SplatTrueVal, SplatZero, VL); 3761 3762 return convertFromScalableVector(VecVT, Select, DAG, Subtarget); 3763 } 3764 3765 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV( 3766 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const { 3767 MVT ExtVT = Op.getSimpleValueType(); 3768 // Only custom-lower extensions from fixed-length vector types. 3769 if (!ExtVT.isFixedLengthVector()) 3770 return Op; 3771 MVT VT = Op.getOperand(0).getSimpleValueType(); 3772 // Grab the canonical container type for the extended type. Infer the smaller 3773 // type from that to ensure the same number of vector elements, as we know 3774 // the LMUL will be sufficient to hold the smaller type. 3775 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT); 3776 // Get the extended container type manually to ensure the same number of 3777 // vector elements between source and dest. 3778 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 3779 ContainerExtVT.getVectorElementCount()); 3780 3781 SDValue Op1 = 3782 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 3783 3784 SDLoc DL(Op); 3785 SDValue Mask, VL; 3786 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 3787 3788 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL); 3789 3790 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget); 3791 } 3792 3793 // Custom-lower truncations from vectors to mask vectors by using a mask and a 3794 // setcc operation: 3795 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) 3796 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, 3797 SelectionDAG &DAG) const { 3798 SDLoc DL(Op); 3799 EVT MaskVT = Op.getValueType(); 3800 // Only expect to custom-lower truncations to mask types 3801 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 && 3802 "Unexpected type for vector mask lowering"); 3803 SDValue Src = Op.getOperand(0); 3804 MVT VecVT = Src.getSimpleValueType(); 3805 3806 // If this is a fixed vector, we need to convert it to a scalable vector. 3807 MVT ContainerVT = VecVT; 3808 if (VecVT.isFixedLengthVector()) { 3809 ContainerVT = getContainerForFixedLengthVector(VecVT); 3810 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); 3811 } 3812 3813 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); 3814 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); 3815 3816 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne); 3817 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero); 3818 3819 if (VecVT.isScalableVector()) { 3820 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); 3821 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); 3822 } 3823 3824 SDValue Mask, VL; 3825 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 3826 3827 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); 3828 SDValue Trunc = 3829 DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); 3830 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, 3831 DAG.getCondCode(ISD::SETNE), Mask, VL); 3832 return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); 3833 } 3834 3835 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the 3836 // first position of a vector, and that vector is slid up to the insert index. 3837 // By limiting the active vector length to index+1 and merging with the 3838 // original vector (with an undisturbed tail policy for elements >= VL), we 3839 // achieve the desired result of leaving all elements untouched except the one 3840 // at VL-1, which is replaced with the desired value. 3841 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 3842 SelectionDAG &DAG) const { 3843 SDLoc DL(Op); 3844 MVT VecVT = Op.getSimpleValueType(); 3845 SDValue Vec = Op.getOperand(0); 3846 SDValue Val = Op.getOperand(1); 3847 SDValue Idx = Op.getOperand(2); 3848 3849 if (VecVT.getVectorElementType() == MVT::i1) { 3850 // FIXME: For now we just promote to an i8 vector and insert into that, 3851 // but this is probably not optimal. 3852 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 3853 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 3854 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx); 3855 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec); 3856 } 3857 3858 MVT ContainerVT = VecVT; 3859 // If the operand is a fixed-length vector, convert to a scalable one. 3860 if (VecVT.isFixedLengthVector()) { 3861 ContainerVT = getContainerForFixedLengthVector(VecVT); 3862 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3863 } 3864 3865 MVT XLenVT = Subtarget.getXLenVT(); 3866 3867 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 3868 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; 3869 // Even i64-element vectors on RV32 can be lowered without scalar 3870 // legalization if the most-significant 32 bits of the value are not affected 3871 // by the sign-extension of the lower 32 bits. 3872 // TODO: We could also catch sign extensions of a 32-bit value. 3873 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) { 3874 const auto *CVal = cast<ConstantSDNode>(Val); 3875 if (isInt<32>(CVal->getSExtValue())) { 3876 IsLegalInsert = true; 3877 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 3878 } 3879 } 3880 3881 SDValue Mask, VL; 3882 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 3883 3884 SDValue ValInVec; 3885 3886 if (IsLegalInsert) { 3887 unsigned Opc = 3888 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; 3889 if (isNullConstant(Idx)) { 3890 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); 3891 if (!VecVT.isFixedLengthVector()) 3892 return Vec; 3893 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); 3894 } 3895 ValInVec = 3896 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL); 3897 } else { 3898 // On RV32, i64-element vectors must be specially handled to place the 3899 // value at element 0, by using two vslide1up instructions in sequence on 3900 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for 3901 // this. 3902 SDValue One = DAG.getConstant(1, DL, XLenVT); 3903 SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero); 3904 SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One); 3905 MVT I32ContainerVT = 3906 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2); 3907 SDValue I32Mask = 3908 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first; 3909 // Limit the active VL to two. 3910 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT); 3911 // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied 3912 // undef doesn't obey the earlyclobber constraint. Just splat a zero value. 3913 ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero, 3914 InsertI64VL); 3915 // First slide in the hi value, then the lo in underneath it. 3916 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 3917 ValHi, I32Mask, InsertI64VL); 3918 ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec, 3919 ValLo, I32Mask, InsertI64VL); 3920 // Bitcast back to the right container type. 3921 ValInVec = DAG.getBitcast(ContainerVT, ValInVec); 3922 } 3923 3924 // Now that the value is in a vector, slide it into position. 3925 SDValue InsertVL = 3926 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT)); 3927 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 3928 ValInVec, Idx, Mask, InsertVL); 3929 if (!VecVT.isFixedLengthVector()) 3930 return Slideup; 3931 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 3932 } 3933 3934 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then 3935 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer 3936 // types this is done using VMV_X_S to allow us to glean information about the 3937 // sign bits of the result. 3938 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 3939 SelectionDAG &DAG) const { 3940 SDLoc DL(Op); 3941 SDValue Idx = Op.getOperand(1); 3942 SDValue Vec = Op.getOperand(0); 3943 EVT EltVT = Op.getValueType(); 3944 MVT VecVT = Vec.getSimpleValueType(); 3945 MVT XLenVT = Subtarget.getXLenVT(); 3946 3947 if (VecVT.getVectorElementType() == MVT::i1) { 3948 // FIXME: For now we just promote to an i8 vector and extract from that, 3949 // but this is probably not optimal. 3950 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount()); 3951 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec); 3952 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx); 3953 } 3954 3955 // If this is a fixed vector, we need to convert it to a scalable vector. 3956 MVT ContainerVT = VecVT; 3957 if (VecVT.isFixedLengthVector()) { 3958 ContainerVT = getContainerForFixedLengthVector(VecVT); 3959 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 3960 } 3961 3962 // If the index is 0, the vector is already in the right position. 3963 if (!isNullConstant(Idx)) { 3964 // Use a VL of 1 to avoid processing more elements than we need. 3965 SDValue VL = DAG.getConstant(1, DL, XLenVT); 3966 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 3967 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 3968 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 3969 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 3970 } 3971 3972 if (!EltVT.isInteger()) { 3973 // Floating-point extracts are handled in TableGen. 3974 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, 3975 DAG.getConstant(0, DL, XLenVT)); 3976 } 3977 3978 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 3979 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0); 3980 } 3981 3982 // Some RVV intrinsics may claim that they want an integer operand to be 3983 // promoted or expanded. 3984 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG, 3985 const RISCVSubtarget &Subtarget) { 3986 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 3987 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) && 3988 "Unexpected opcode"); 3989 3990 if (!Subtarget.hasVInstructions()) 3991 return SDValue(); 3992 3993 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN; 3994 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0); 3995 SDLoc DL(Op); 3996 3997 const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II = 3998 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo); 3999 if (!II || !II->SplatOperand) 4000 return SDValue(); 4001 4002 unsigned SplatOp = II->SplatOperand + HasChain; 4003 assert(SplatOp < Op.getNumOperands()); 4004 4005 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end()); 4006 SDValue &ScalarOp = Operands[SplatOp]; 4007 MVT OpVT = ScalarOp.getSimpleValueType(); 4008 MVT XLenVT = Subtarget.getXLenVT(); 4009 4010 // If this isn't a scalar, or its type is XLenVT we're done. 4011 if (!OpVT.isScalarInteger() || OpVT == XLenVT) 4012 return SDValue(); 4013 4014 // Simplest case is that the operand needs to be promoted to XLenVT. 4015 if (OpVT.bitsLT(XLenVT)) { 4016 // If the operand is a constant, sign extend to increase our chances 4017 // of being able to use a .vi instruction. ANY_EXTEND would become a 4018 // a zero extend and the simm5 check in isel would fail. 4019 // FIXME: Should we ignore the upper bits in isel instead? 4020 unsigned ExtOpc = 4021 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; 4022 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp); 4023 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 4024 } 4025 4026 // Use the previous operand to get the vXi64 VT. The result might be a mask 4027 // VT for compares. Using the previous operand assumes that the previous 4028 // operand will never have a smaller element size than a scalar operand and 4029 // that a widening operation never uses SEW=64. 4030 // NOTE: If this fails the below assert, we can probably just find the 4031 // element count from any operand or result and use it to construct the VT. 4032 assert(II->SplatOperand > 1 && "Unexpected splat operand!"); 4033 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType(); 4034 4035 // The more complex case is when the scalar is larger than XLenVT. 4036 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 && 4037 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!"); 4038 4039 // If this is a sign-extended 32-bit constant, we can truncate it and rely 4040 // on the instruction to sign-extend since SEW>XLEN. 4041 if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) { 4042 if (isInt<32>(CVal->getSExtValue())) { 4043 ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32); 4044 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 4045 } 4046 } 4047 4048 // We need to convert the scalar to a splat vector. 4049 // FIXME: Can we implicitly truncate the scalar if it is known to 4050 // be sign extended? 4051 // VL should be the last operand. 4052 SDValue VL = Op.getOperand(Op.getNumOperands() - 1); 4053 assert(VL.getValueType() == XLenVT); 4054 ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG); 4055 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); 4056 } 4057 4058 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 4059 SelectionDAG &DAG) const { 4060 unsigned IntNo = Op.getConstantOperandVal(0); 4061 SDLoc DL(Op); 4062 MVT XLenVT = Subtarget.getXLenVT(); 4063 4064 switch (IntNo) { 4065 default: 4066 break; // Don't custom lower most intrinsics. 4067 case Intrinsic::thread_pointer: { 4068 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4069 return DAG.getRegister(RISCV::X4, PtrVT); 4070 } 4071 case Intrinsic::riscv_orc_b: 4072 // Lower to the GORCI encoding for orc.b. 4073 return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1), 4074 DAG.getConstant(7, DL, XLenVT)); 4075 case Intrinsic::riscv_grev: 4076 case Intrinsic::riscv_gorc: { 4077 unsigned Opc = 4078 IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC; 4079 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); 4080 } 4081 case Intrinsic::riscv_shfl: 4082 case Intrinsic::riscv_unshfl: { 4083 unsigned Opc = 4084 IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL; 4085 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); 4086 } 4087 case Intrinsic::riscv_bcompress: 4088 case Intrinsic::riscv_bdecompress: { 4089 unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS 4090 : RISCVISD::BDECOMPRESS; 4091 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); 4092 } 4093 case Intrinsic::riscv_vmv_x_s: 4094 assert(Op.getValueType() == XLenVT && "Unexpected VT!"); 4095 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), 4096 Op.getOperand(1)); 4097 case Intrinsic::riscv_vmv_v_x: 4098 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2), 4099 Op.getSimpleValueType(), DL, DAG, Subtarget); 4100 case Intrinsic::riscv_vfmv_v_f: 4101 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(), 4102 Op.getOperand(1), Op.getOperand(2)); 4103 case Intrinsic::riscv_vmv_s_x: { 4104 SDValue Scalar = Op.getOperand(2); 4105 4106 if (Scalar.getValueType().bitsLE(XLenVT)) { 4107 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar); 4108 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(), 4109 Op.getOperand(1), Scalar, Op.getOperand(3)); 4110 } 4111 4112 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!"); 4113 4114 // This is an i64 value that lives in two scalar registers. We have to 4115 // insert this in a convoluted way. First we build vXi64 splat containing 4116 // the/ two values that we assemble using some bit math. Next we'll use 4117 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask 4118 // to merge element 0 from our splat into the source vector. 4119 // FIXME: This is probably not the best way to do this, but it is 4120 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting 4121 // point. 4122 // sw lo, (a0) 4123 // sw hi, 4(a0) 4124 // vlse vX, (a0) 4125 // 4126 // vid.v vVid 4127 // vmseq.vx mMask, vVid, 0 4128 // vmerge.vvm vDest, vSrc, vVal, mMask 4129 MVT VT = Op.getSimpleValueType(); 4130 SDValue Vec = Op.getOperand(1); 4131 SDValue VL = Op.getOperand(3); 4132 4133 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG); 4134 SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, 4135 DAG.getConstant(0, DL, MVT::i32), VL); 4136 4137 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); 4138 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 4139 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 4140 SDValue SelectCond = 4141 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx, 4142 DAG.getCondCode(ISD::SETEQ), Mask, VL); 4143 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, 4144 Vec, VL); 4145 } 4146 case Intrinsic::riscv_vslide1up: 4147 case Intrinsic::riscv_vslide1down: 4148 case Intrinsic::riscv_vslide1up_mask: 4149 case Intrinsic::riscv_vslide1down_mask: { 4150 // We need to special case these when the scalar is larger than XLen. 4151 unsigned NumOps = Op.getNumOperands(); 4152 bool IsMasked = NumOps == 7; 4153 unsigned OpOffset = IsMasked ? 1 : 0; 4154 SDValue Scalar = Op.getOperand(2 + OpOffset); 4155 if (Scalar.getValueType().bitsLE(XLenVT)) 4156 break; 4157 4158 // Splatting a sign extended constant is fine. 4159 if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar)) 4160 if (isInt<32>(CVal->getSExtValue())) 4161 break; 4162 4163 MVT VT = Op.getSimpleValueType(); 4164 assert(VT.getVectorElementType() == MVT::i64 && 4165 Scalar.getValueType() == MVT::i64 && "Unexpected VTs"); 4166 4167 // Convert the vector source to the equivalent nxvXi32 vector. 4168 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); 4169 SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset)); 4170 4171 SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 4172 DAG.getConstant(0, DL, XLenVT)); 4173 SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, 4174 DAG.getConstant(1, DL, XLenVT)); 4175 4176 // Double the VL since we halved SEW. 4177 SDValue VL = Op.getOperand(NumOps - (1 + OpOffset)); 4178 SDValue I32VL = 4179 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); 4180 4181 MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount()); 4182 SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL); 4183 4184 // Shift the two scalar parts in using SEW=32 slide1up/slide1down 4185 // instructions. 4186 if (IntNo == Intrinsic::riscv_vslide1up || 4187 IntNo == Intrinsic::riscv_vslide1up_mask) { 4188 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi, 4189 I32Mask, I32VL); 4190 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo, 4191 I32Mask, I32VL); 4192 } else { 4193 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo, 4194 I32Mask, I32VL); 4195 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi, 4196 I32Mask, I32VL); 4197 } 4198 4199 // Convert back to nxvXi64. 4200 Vec = DAG.getBitcast(VT, Vec); 4201 4202 if (!IsMasked) 4203 return Vec; 4204 4205 // Apply mask after the operation. 4206 SDValue Mask = Op.getOperand(NumOps - 3); 4207 SDValue MaskedOff = Op.getOperand(1); 4208 return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL); 4209 } 4210 } 4211 4212 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 4213 } 4214 4215 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, 4216 SelectionDAG &DAG) const { 4217 unsigned IntNo = Op.getConstantOperandVal(1); 4218 switch (IntNo) { 4219 default: 4220 break; 4221 case Intrinsic::riscv_masked_strided_load: { 4222 SDLoc DL(Op); 4223 MVT XLenVT = Subtarget.getXLenVT(); 4224 4225 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 4226 // the selection of the masked intrinsics doesn't do this for us. 4227 SDValue Mask = Op.getOperand(5); 4228 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 4229 4230 MVT VT = Op->getSimpleValueType(0); 4231 MVT ContainerVT = getContainerForFixedLengthVector(VT); 4232 4233 SDValue PassThru = Op.getOperand(2); 4234 if (!IsUnmasked) { 4235 MVT MaskVT = 4236 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 4237 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 4238 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 4239 } 4240 4241 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 4242 4243 SDValue IntID = DAG.getTargetConstant( 4244 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL, 4245 XLenVT); 4246 4247 auto *Load = cast<MemIntrinsicSDNode>(Op); 4248 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID}; 4249 if (!IsUnmasked) 4250 Ops.push_back(PassThru); 4251 Ops.push_back(Op.getOperand(3)); // Ptr 4252 Ops.push_back(Op.getOperand(4)); // Stride 4253 if (!IsUnmasked) 4254 Ops.push_back(Mask); 4255 Ops.push_back(VL); 4256 if (!IsUnmasked) { 4257 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); 4258 Ops.push_back(Policy); 4259 } 4260 4261 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 4262 SDValue Result = 4263 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 4264 Load->getMemoryVT(), Load->getMemOperand()); 4265 SDValue Chain = Result.getValue(1); 4266 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 4267 return DAG.getMergeValues({Result, Chain}, DL); 4268 } 4269 } 4270 4271 return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); 4272 } 4273 4274 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op, 4275 SelectionDAG &DAG) const { 4276 unsigned IntNo = Op.getConstantOperandVal(1); 4277 switch (IntNo) { 4278 default: 4279 break; 4280 case Intrinsic::riscv_masked_strided_store: { 4281 SDLoc DL(Op); 4282 MVT XLenVT = Subtarget.getXLenVT(); 4283 4284 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 4285 // the selection of the masked intrinsics doesn't do this for us. 4286 SDValue Mask = Op.getOperand(5); 4287 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 4288 4289 SDValue Val = Op.getOperand(2); 4290 MVT VT = Val.getSimpleValueType(); 4291 MVT ContainerVT = getContainerForFixedLengthVector(VT); 4292 4293 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 4294 if (!IsUnmasked) { 4295 MVT MaskVT = 4296 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 4297 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 4298 } 4299 4300 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); 4301 4302 SDValue IntID = DAG.getTargetConstant( 4303 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL, 4304 XLenVT); 4305 4306 auto *Store = cast<MemIntrinsicSDNode>(Op); 4307 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID}; 4308 Ops.push_back(Val); 4309 Ops.push_back(Op.getOperand(3)); // Ptr 4310 Ops.push_back(Op.getOperand(4)); // Stride 4311 if (!IsUnmasked) 4312 Ops.push_back(Mask); 4313 Ops.push_back(VL); 4314 4315 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(), 4316 Ops, Store->getMemoryVT(), 4317 Store->getMemOperand()); 4318 } 4319 } 4320 4321 return SDValue(); 4322 } 4323 4324 static MVT getLMUL1VT(MVT VT) { 4325 assert(VT.getVectorElementType().getSizeInBits() <= 64 && 4326 "Unexpected vector MVT"); 4327 return MVT::getScalableVectorVT( 4328 VT.getVectorElementType(), 4329 RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits()); 4330 } 4331 4332 static unsigned getRVVReductionOp(unsigned ISDOpcode) { 4333 switch (ISDOpcode) { 4334 default: 4335 llvm_unreachable("Unhandled reduction"); 4336 case ISD::VECREDUCE_ADD: 4337 return RISCVISD::VECREDUCE_ADD_VL; 4338 case ISD::VECREDUCE_UMAX: 4339 return RISCVISD::VECREDUCE_UMAX_VL; 4340 case ISD::VECREDUCE_SMAX: 4341 return RISCVISD::VECREDUCE_SMAX_VL; 4342 case ISD::VECREDUCE_UMIN: 4343 return RISCVISD::VECREDUCE_UMIN_VL; 4344 case ISD::VECREDUCE_SMIN: 4345 return RISCVISD::VECREDUCE_SMIN_VL; 4346 case ISD::VECREDUCE_AND: 4347 return RISCVISD::VECREDUCE_AND_VL; 4348 case ISD::VECREDUCE_OR: 4349 return RISCVISD::VECREDUCE_OR_VL; 4350 case ISD::VECREDUCE_XOR: 4351 return RISCVISD::VECREDUCE_XOR_VL; 4352 } 4353 } 4354 4355 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, 4356 SelectionDAG &DAG, 4357 bool IsVP) const { 4358 SDLoc DL(Op); 4359 SDValue Vec = Op.getOperand(IsVP ? 1 : 0); 4360 MVT VecVT = Vec.getSimpleValueType(); 4361 assert((Op.getOpcode() == ISD::VECREDUCE_AND || 4362 Op.getOpcode() == ISD::VECREDUCE_OR || 4363 Op.getOpcode() == ISD::VECREDUCE_XOR || 4364 Op.getOpcode() == ISD::VP_REDUCE_AND || 4365 Op.getOpcode() == ISD::VP_REDUCE_OR || 4366 Op.getOpcode() == ISD::VP_REDUCE_XOR) && 4367 "Unexpected reduction lowering"); 4368 4369 MVT XLenVT = Subtarget.getXLenVT(); 4370 assert(Op.getValueType() == XLenVT && 4371 "Expected reduction output to be legalized to XLenVT"); 4372 4373 MVT ContainerVT = VecVT; 4374 if (VecVT.isFixedLengthVector()) { 4375 ContainerVT = getContainerForFixedLengthVector(VecVT); 4376 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 4377 } 4378 4379 SDValue Mask, VL; 4380 if (IsVP) { 4381 Mask = Op.getOperand(2); 4382 VL = Op.getOperand(3); 4383 } else { 4384 std::tie(Mask, VL) = 4385 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 4386 } 4387 4388 unsigned BaseOpc; 4389 ISD::CondCode CC; 4390 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 4391 4392 switch (Op.getOpcode()) { 4393 default: 4394 llvm_unreachable("Unhandled reduction"); 4395 case ISD::VECREDUCE_AND: 4396 case ISD::VP_REDUCE_AND: { 4397 // vcpop ~x == 0 4398 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); 4399 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); 4400 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 4401 CC = ISD::SETEQ; 4402 BaseOpc = ISD::AND; 4403 break; 4404 } 4405 case ISD::VECREDUCE_OR: 4406 case ISD::VP_REDUCE_OR: 4407 // vcpop x != 0 4408 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 4409 CC = ISD::SETNE; 4410 BaseOpc = ISD::OR; 4411 break; 4412 case ISD::VECREDUCE_XOR: 4413 case ISD::VP_REDUCE_XOR: { 4414 // ((vcpop x) & 1) != 0 4415 SDValue One = DAG.getConstant(1, DL, XLenVT); 4416 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); 4417 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One); 4418 CC = ISD::SETNE; 4419 BaseOpc = ISD::XOR; 4420 break; 4421 } 4422 } 4423 4424 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC); 4425 4426 if (!IsVP) 4427 return SetCC; 4428 4429 // Now include the start value in the operation. 4430 // Note that we must return the start value when no elements are operated 4431 // upon. The vcpop instructions we've emitted in each case above will return 4432 // 0 for an inactive vector, and so we've already received the neutral value: 4433 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we 4434 // can simply include the start value. 4435 return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0)); 4436 } 4437 4438 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, 4439 SelectionDAG &DAG) const { 4440 SDLoc DL(Op); 4441 SDValue Vec = Op.getOperand(0); 4442 EVT VecEVT = Vec.getValueType(); 4443 4444 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); 4445 4446 // Due to ordering in legalize types we may have a vector type that needs to 4447 // be split. Do that manually so we can get down to a legal type. 4448 while (getTypeAction(*DAG.getContext(), VecEVT) == 4449 TargetLowering::TypeSplitVector) { 4450 SDValue Lo, Hi; 4451 std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL); 4452 VecEVT = Lo.getValueType(); 4453 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi); 4454 } 4455 4456 // TODO: The type may need to be widened rather than split. Or widened before 4457 // it can be split. 4458 if (!isTypeLegal(VecEVT)) 4459 return SDValue(); 4460 4461 MVT VecVT = VecEVT.getSimpleVT(); 4462 MVT VecEltVT = VecVT.getVectorElementType(); 4463 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode()); 4464 4465 MVT ContainerVT = VecVT; 4466 if (VecVT.isFixedLengthVector()) { 4467 ContainerVT = getContainerForFixedLengthVector(VecVT); 4468 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 4469 } 4470 4471 MVT M1VT = getLMUL1VT(ContainerVT); 4472 4473 SDValue Mask, VL; 4474 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 4475 4476 // FIXME: This is a VLMAX splat which might be too large and can prevent 4477 // vsetvli removal. 4478 SDValue NeutralElem = 4479 DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); 4480 SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem); 4481 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec, 4482 IdentitySplat, Mask, VL); 4483 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 4484 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 4485 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 4486 } 4487 4488 // Given a reduction op, this function returns the matching reduction opcode, 4489 // the vector SDValue and the scalar SDValue required to lower this to a 4490 // RISCVISD node. 4491 static std::tuple<unsigned, SDValue, SDValue> 4492 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { 4493 SDLoc DL(Op); 4494 auto Flags = Op->getFlags(); 4495 unsigned Opcode = Op.getOpcode(); 4496 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode); 4497 switch (Opcode) { 4498 default: 4499 llvm_unreachable("Unhandled reduction"); 4500 case ISD::VECREDUCE_FADD: 4501 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), 4502 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); 4503 case ISD::VECREDUCE_SEQ_FADD: 4504 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), 4505 Op.getOperand(0)); 4506 case ISD::VECREDUCE_FMIN: 4507 return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0), 4508 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); 4509 case ISD::VECREDUCE_FMAX: 4510 return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0), 4511 DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); 4512 } 4513 } 4514 4515 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, 4516 SelectionDAG &DAG) const { 4517 SDLoc DL(Op); 4518 MVT VecEltVT = Op.getSimpleValueType(); 4519 4520 unsigned RVVOpcode; 4521 SDValue VectorVal, ScalarVal; 4522 std::tie(RVVOpcode, VectorVal, ScalarVal) = 4523 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); 4524 MVT VecVT = VectorVal.getSimpleValueType(); 4525 4526 MVT ContainerVT = VecVT; 4527 if (VecVT.isFixedLengthVector()) { 4528 ContainerVT = getContainerForFixedLengthVector(VecVT); 4529 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); 4530 } 4531 4532 MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); 4533 4534 SDValue Mask, VL; 4535 std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); 4536 4537 // FIXME: This is a VLMAX splat which might be too large and can prevent 4538 // vsetvli removal. 4539 SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); 4540 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), 4541 VectorVal, ScalarSplat, Mask, VL); 4542 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, 4543 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 4544 } 4545 4546 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) { 4547 switch (ISDOpcode) { 4548 default: 4549 llvm_unreachable("Unhandled reduction"); 4550 case ISD::VP_REDUCE_ADD: 4551 return RISCVISD::VECREDUCE_ADD_VL; 4552 case ISD::VP_REDUCE_UMAX: 4553 return RISCVISD::VECREDUCE_UMAX_VL; 4554 case ISD::VP_REDUCE_SMAX: 4555 return RISCVISD::VECREDUCE_SMAX_VL; 4556 case ISD::VP_REDUCE_UMIN: 4557 return RISCVISD::VECREDUCE_UMIN_VL; 4558 case ISD::VP_REDUCE_SMIN: 4559 return RISCVISD::VECREDUCE_SMIN_VL; 4560 case ISD::VP_REDUCE_AND: 4561 return RISCVISD::VECREDUCE_AND_VL; 4562 case ISD::VP_REDUCE_OR: 4563 return RISCVISD::VECREDUCE_OR_VL; 4564 case ISD::VP_REDUCE_XOR: 4565 return RISCVISD::VECREDUCE_XOR_VL; 4566 case ISD::VP_REDUCE_FADD: 4567 return RISCVISD::VECREDUCE_FADD_VL; 4568 case ISD::VP_REDUCE_SEQ_FADD: 4569 return RISCVISD::VECREDUCE_SEQ_FADD_VL; 4570 case ISD::VP_REDUCE_FMAX: 4571 return RISCVISD::VECREDUCE_FMAX_VL; 4572 case ISD::VP_REDUCE_FMIN: 4573 return RISCVISD::VECREDUCE_FMIN_VL; 4574 } 4575 } 4576 4577 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, 4578 SelectionDAG &DAG) const { 4579 SDLoc DL(Op); 4580 SDValue Vec = Op.getOperand(1); 4581 EVT VecEVT = Vec.getValueType(); 4582 4583 // TODO: The type may need to be widened rather than split. Or widened before 4584 // it can be split. 4585 if (!isTypeLegal(VecEVT)) 4586 return SDValue(); 4587 4588 MVT VecVT = VecEVT.getSimpleVT(); 4589 MVT VecEltVT = VecVT.getVectorElementType(); 4590 unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode()); 4591 4592 MVT ContainerVT = VecVT; 4593 if (VecVT.isFixedLengthVector()) { 4594 ContainerVT = getContainerForFixedLengthVector(VecVT); 4595 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 4596 } 4597 4598 SDValue VL = Op.getOperand(3); 4599 SDValue Mask = Op.getOperand(2); 4600 4601 MVT M1VT = getLMUL1VT(ContainerVT); 4602 MVT XLenVT = Subtarget.getXLenVT(); 4603 MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT; 4604 4605 // FIXME: This is a VLMAX splat which might be too large and can prevent 4606 // vsetvli removal. 4607 SDValue StartSplat = DAG.getSplatVector(M1VT, DL, Op.getOperand(0)); 4608 SDValue Reduction = 4609 DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL); 4610 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, 4611 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 4612 if (!VecVT.isInteger()) 4613 return Elt0; 4614 return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); 4615 } 4616 4617 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, 4618 SelectionDAG &DAG) const { 4619 SDValue Vec = Op.getOperand(0); 4620 SDValue SubVec = Op.getOperand(1); 4621 MVT VecVT = Vec.getSimpleValueType(); 4622 MVT SubVecVT = SubVec.getSimpleValueType(); 4623 4624 SDLoc DL(Op); 4625 MVT XLenVT = Subtarget.getXLenVT(); 4626 unsigned OrigIdx = Op.getConstantOperandVal(2); 4627 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4628 4629 // We don't have the ability to slide mask vectors up indexed by their i1 4630 // elements; the smallest we can do is i8. Often we are able to bitcast to 4631 // equivalent i8 vectors. Note that when inserting a fixed-length vector 4632 // into a scalable one, we might not necessarily have enough scalable 4633 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. 4634 if (SubVecVT.getVectorElementType() == MVT::i1 && 4635 (OrigIdx != 0 || !Vec.isUndef())) { 4636 if (VecVT.getVectorMinNumElements() >= 8 && 4637 SubVecVT.getVectorMinNumElements() >= 8) { 4638 assert(OrigIdx % 8 == 0 && "Invalid index"); 4639 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 4640 SubVecVT.getVectorMinNumElements() % 8 == 0 && 4641 "Unexpected mask vector lowering"); 4642 OrigIdx /= 8; 4643 SubVecVT = 4644 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 4645 SubVecVT.isScalableVector()); 4646 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 4647 VecVT.isScalableVector()); 4648 Vec = DAG.getBitcast(VecVT, Vec); 4649 SubVec = DAG.getBitcast(SubVecVT, SubVec); 4650 } else { 4651 // We can't slide this mask vector up indexed by its i1 elements. 4652 // This poses a problem when we wish to insert a scalable vector which 4653 // can't be re-expressed as a larger type. Just choose the slow path and 4654 // extend to a larger type, then truncate back down. 4655 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 4656 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 4657 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 4658 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec); 4659 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec, 4660 Op.getOperand(2)); 4661 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT); 4662 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE); 4663 } 4664 } 4665 4666 // If the subvector vector is a fixed-length type, we cannot use subregister 4667 // manipulation to simplify the codegen; we don't know which register of a 4668 // LMUL group contains the specific subvector as we only know the minimum 4669 // register size. Therefore we must slide the vector group up the full 4670 // amount. 4671 if (SubVecVT.isFixedLengthVector()) { 4672 if (OrigIdx == 0 && Vec.isUndef()) 4673 return Op; 4674 MVT ContainerVT = VecVT; 4675 if (VecVT.isFixedLengthVector()) { 4676 ContainerVT = getContainerForFixedLengthVector(VecVT); 4677 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 4678 } 4679 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, 4680 DAG.getUNDEF(ContainerVT), SubVec, 4681 DAG.getConstant(0, DL, XLenVT)); 4682 SDValue Mask = 4683 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 4684 // Set the vector length to only the number of elements we care about. Note 4685 // that for slideup this includes the offset. 4686 SDValue VL = 4687 DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT); 4688 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 4689 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec, 4690 SubVec, SlideupAmt, Mask, VL); 4691 if (VecVT.isFixedLengthVector()) 4692 Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); 4693 return DAG.getBitcast(Op.getValueType(), Slideup); 4694 } 4695 4696 unsigned SubRegIdx, RemIdx; 4697 std::tie(SubRegIdx, RemIdx) = 4698 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 4699 VecVT, SubVecVT, OrigIdx, TRI); 4700 4701 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT); 4702 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 4703 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 4704 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 4705 4706 // 1. If the Idx has been completely eliminated and this subvector's size is 4707 // a vector register or a multiple thereof, or the surrounding elements are 4708 // undef, then this is a subvector insert which naturally aligns to a vector 4709 // register. These can easily be handled using subregister manipulation. 4710 // 2. If the subvector is smaller than a vector register, then the insertion 4711 // must preserve the undisturbed elements of the register. We do this by 4712 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type 4713 // (which resolves to a subregister copy), performing a VSLIDEUP to place the 4714 // subvector within the vector register, and an INSERT_SUBVECTOR of that 4715 // LMUL=1 type back into the larger vector (resolving to another subregister 4716 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type 4717 // to avoid allocating a large register group to hold our subvector. 4718 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef())) 4719 return Op; 4720 4721 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements 4722 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy 4723 // (in our case undisturbed). This means we can set up a subvector insertion 4724 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the 4725 // size of the subvector. 4726 MVT InterSubVT = VecVT; 4727 SDValue AlignedExtract = Vec; 4728 unsigned AlignedIdx = OrigIdx - RemIdx; 4729 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 4730 InterSubVT = getLMUL1VT(VecVT); 4731 // Extract a subvector equal to the nearest full vector register type. This 4732 // should resolve to a EXTRACT_SUBREG instruction. 4733 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 4734 DAG.getConstant(AlignedIdx, DL, XLenVT)); 4735 } 4736 4737 SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT); 4738 // For scalable vectors this must be further multiplied by vscale. 4739 SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt); 4740 4741 SDValue Mask, VL; 4742 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 4743 4744 // Construct the vector length corresponding to RemIdx + length(SubVecVT). 4745 VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT); 4746 VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL); 4747 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL); 4748 4749 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT, 4750 DAG.getUNDEF(InterSubVT), SubVec, 4751 DAG.getConstant(0, DL, XLenVT)); 4752 4753 SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT, 4754 AlignedExtract, SubVec, SlideupAmt, Mask, VL); 4755 4756 // If required, insert this subvector back into the correct vector register. 4757 // This should resolve to an INSERT_SUBREG instruction. 4758 if (VecVT.bitsGT(InterSubVT)) 4759 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup, 4760 DAG.getConstant(AlignedIdx, DL, XLenVT)); 4761 4762 // We might have bitcast from a mask type: cast back to the original type if 4763 // required. 4764 return DAG.getBitcast(Op.getSimpleValueType(), Slideup); 4765 } 4766 4767 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, 4768 SelectionDAG &DAG) const { 4769 SDValue Vec = Op.getOperand(0); 4770 MVT SubVecVT = Op.getSimpleValueType(); 4771 MVT VecVT = Vec.getSimpleValueType(); 4772 4773 SDLoc DL(Op); 4774 MVT XLenVT = Subtarget.getXLenVT(); 4775 unsigned OrigIdx = Op.getConstantOperandVal(1); 4776 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 4777 4778 // We don't have the ability to slide mask vectors down indexed by their i1 4779 // elements; the smallest we can do is i8. Often we are able to bitcast to 4780 // equivalent i8 vectors. Note that when extracting a fixed-length vector 4781 // from a scalable one, we might not necessarily have enough scalable 4782 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid. 4783 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) { 4784 if (VecVT.getVectorMinNumElements() >= 8 && 4785 SubVecVT.getVectorMinNumElements() >= 8) { 4786 assert(OrigIdx % 8 == 0 && "Invalid index"); 4787 assert(VecVT.getVectorMinNumElements() % 8 == 0 && 4788 SubVecVT.getVectorMinNumElements() % 8 == 0 && 4789 "Unexpected mask vector lowering"); 4790 OrigIdx /= 8; 4791 SubVecVT = 4792 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8, 4793 SubVecVT.isScalableVector()); 4794 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8, 4795 VecVT.isScalableVector()); 4796 Vec = DAG.getBitcast(VecVT, Vec); 4797 } else { 4798 // We can't slide this mask vector down, indexed by its i1 elements. 4799 // This poses a problem when we wish to extract a scalable vector which 4800 // can't be re-expressed as a larger type. Just choose the slow path and 4801 // extend to a larger type, then truncate back down. 4802 // TODO: We could probably improve this when extracting certain fixed 4803 // from fixed, where we can extract as i8 and shift the correct element 4804 // right to reach the desired subvector? 4805 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8); 4806 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8); 4807 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec); 4808 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec, 4809 Op.getOperand(1)); 4810 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT); 4811 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE); 4812 } 4813 } 4814 4815 // If the subvector vector is a fixed-length type, we cannot use subregister 4816 // manipulation to simplify the codegen; we don't know which register of a 4817 // LMUL group contains the specific subvector as we only know the minimum 4818 // register size. Therefore we must slide the vector group down the full 4819 // amount. 4820 if (SubVecVT.isFixedLengthVector()) { 4821 // With an index of 0 this is a cast-like subvector, which can be performed 4822 // with subregister operations. 4823 if (OrigIdx == 0) 4824 return Op; 4825 MVT ContainerVT = VecVT; 4826 if (VecVT.isFixedLengthVector()) { 4827 ContainerVT = getContainerForFixedLengthVector(VecVT); 4828 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 4829 } 4830 SDValue Mask = 4831 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; 4832 // Set the vector length to only the number of elements we care about. This 4833 // avoids sliding down elements we're going to discard straight away. 4834 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT); 4835 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT); 4836 SDValue Slidedown = 4837 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 4838 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL); 4839 // Now we can use a cast-like subvector extract to get the result. 4840 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 4841 DAG.getConstant(0, DL, XLenVT)); 4842 return DAG.getBitcast(Op.getValueType(), Slidedown); 4843 } 4844 4845 unsigned SubRegIdx, RemIdx; 4846 std::tie(SubRegIdx, RemIdx) = 4847 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 4848 VecVT, SubVecVT, OrigIdx, TRI); 4849 4850 // If the Idx has been completely eliminated then this is a subvector extract 4851 // which naturally aligns to a vector register. These can easily be handled 4852 // using subregister manipulation. 4853 if (RemIdx == 0) 4854 return Op; 4855 4856 // Else we must shift our vector register directly to extract the subvector. 4857 // Do this using VSLIDEDOWN. 4858 4859 // If the vector type is an LMUL-group type, extract a subvector equal to the 4860 // nearest full vector register type. This should resolve to a EXTRACT_SUBREG 4861 // instruction. 4862 MVT InterSubVT = VecVT; 4863 if (VecVT.bitsGT(getLMUL1VT(VecVT))) { 4864 InterSubVT = getLMUL1VT(VecVT); 4865 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, 4866 DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT)); 4867 } 4868 4869 // Slide this vector register down by the desired number of elements in order 4870 // to place the desired subvector starting at element 0. 4871 SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT); 4872 // For scalable vectors this must be further multiplied by vscale. 4873 SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt); 4874 4875 SDValue Mask, VL; 4876 std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget); 4877 SDValue Slidedown = 4878 DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT, 4879 DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL); 4880 4881 // Now the vector is in the right position, extract our final subvector. This 4882 // should resolve to a COPY. 4883 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown, 4884 DAG.getConstant(0, DL, XLenVT)); 4885 4886 // We might have bitcast from a mask type: cast back to the original type if 4887 // required. 4888 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); 4889 } 4890 4891 // Lower step_vector to the vid instruction. Any non-identity step value must 4892 // be accounted for my manual expansion. 4893 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, 4894 SelectionDAG &DAG) const { 4895 SDLoc DL(Op); 4896 MVT VT = Op.getSimpleValueType(); 4897 MVT XLenVT = Subtarget.getXLenVT(); 4898 SDValue Mask, VL; 4899 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); 4900 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); 4901 uint64_t StepValImm = Op.getConstantOperandVal(0); 4902 if (StepValImm != 1) { 4903 if (isPowerOf2_64(StepValImm)) { 4904 SDValue StepVal = 4905 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, 4906 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT)); 4907 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); 4908 } else { 4909 SDValue StepVal = lowerScalarSplat( 4910 DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT, 4911 DL, DAG, Subtarget); 4912 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); 4913 } 4914 } 4915 return StepVec; 4916 } 4917 4918 // Implement vector_reverse using vrgather.vv with indices determined by 4919 // subtracting the id of each element from (VLMAX-1). This will convert 4920 // the indices like so: 4921 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0). 4922 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16. 4923 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op, 4924 SelectionDAG &DAG) const { 4925 SDLoc DL(Op); 4926 MVT VecVT = Op.getSimpleValueType(); 4927 unsigned EltSize = VecVT.getScalarSizeInBits(); 4928 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue(); 4929 4930 unsigned MaxVLMAX = 0; 4931 unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); 4932 if (VectorBitsMax != 0) 4933 MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 4934 4935 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; 4936 MVT IntVT = VecVT.changeVectorElementTypeToInteger(); 4937 4938 // If this is SEW=8 and VLMAX is unknown or more than 256, we need 4939 // to use vrgatherei16.vv. 4940 // TODO: It's also possible to use vrgatherei16.vv for other types to 4941 // decrease register width for the index calculation. 4942 if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) { 4943 // If this is LMUL=8, we have to split before can use vrgatherei16.vv. 4944 // Reverse each half, then reassemble them in reverse order. 4945 // NOTE: It's also possible that after splitting that VLMAX no longer 4946 // requires vrgatherei16.vv. 4947 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) { 4948 SDValue Lo, Hi; 4949 std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); 4950 EVT LoVT, HiVT; 4951 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); 4952 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo); 4953 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi); 4954 // Reassemble the low and high pieces reversed. 4955 // FIXME: This is a CONCAT_VECTORS. 4956 SDValue Res = 4957 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi, 4958 DAG.getIntPtrConstant(0, DL)); 4959 return DAG.getNode( 4960 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo, 4961 DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL)); 4962 } 4963 4964 // Just promote the int type to i16 which will double the LMUL. 4965 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount()); 4966 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; 4967 } 4968 4969 MVT XLenVT = Subtarget.getXLenVT(); 4970 SDValue Mask, VL; 4971 std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); 4972 4973 // Calculate VLMAX-1 for the desired SEW. 4974 unsigned MinElts = VecVT.getVectorMinNumElements(); 4975 SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, 4976 DAG.getConstant(MinElts, DL, XLenVT)); 4977 SDValue VLMinus1 = 4978 DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT)); 4979 4980 // Splat VLMAX-1 taking care to handle SEW==64 on RV32. 4981 bool IsRV32E64 = 4982 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64; 4983 SDValue SplatVL; 4984 if (!IsRV32E64) 4985 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1); 4986 else 4987 SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1); 4988 4989 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL); 4990 SDValue Indices = 4991 DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL); 4992 4993 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); 4994 } 4995 4996 SDValue 4997 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, 4998 SelectionDAG &DAG) const { 4999 SDLoc DL(Op); 5000 auto *Load = cast<LoadSDNode>(Op); 5001 5002 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 5003 Load->getMemoryVT(), 5004 *Load->getMemOperand()) && 5005 "Expecting a correctly-aligned load"); 5006 5007 MVT VT = Op.getSimpleValueType(); 5008 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5009 5010 SDValue VL = 5011 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 5012 5013 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 5014 SDValue NewLoad = DAG.getMemIntrinsicNode( 5015 RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL}, 5016 Load->getMemoryVT(), Load->getMemOperand()); 5017 5018 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget); 5019 return DAG.getMergeValues({Result, Load->getChain()}, DL); 5020 } 5021 5022 SDValue 5023 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, 5024 SelectionDAG &DAG) const { 5025 SDLoc DL(Op); 5026 auto *Store = cast<StoreSDNode>(Op); 5027 5028 assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 5029 Store->getMemoryVT(), 5030 *Store->getMemOperand()) && 5031 "Expecting a correctly-aligned store"); 5032 5033 SDValue StoreVal = Store->getValue(); 5034 MVT VT = StoreVal.getSimpleValueType(); 5035 5036 // If the size less than a byte, we need to pad with zeros to make a byte. 5037 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { 5038 VT = MVT::v8i1; 5039 StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 5040 DAG.getConstant(0, DL, VT), StoreVal, 5041 DAG.getIntPtrConstant(0, DL)); 5042 } 5043 5044 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5045 5046 SDValue VL = 5047 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 5048 5049 SDValue NewValue = 5050 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); 5051 return DAG.getMemIntrinsicNode( 5052 RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), 5053 {Store->getChain(), NewValue, Store->getBasePtr(), VL}, 5054 Store->getMemoryVT(), Store->getMemOperand()); 5055 } 5056 5057 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, 5058 SelectionDAG &DAG) const { 5059 SDLoc DL(Op); 5060 MVT VT = Op.getSimpleValueType(); 5061 5062 const auto *MemSD = cast<MemSDNode>(Op); 5063 EVT MemVT = MemSD->getMemoryVT(); 5064 MachineMemOperand *MMO = MemSD->getMemOperand(); 5065 SDValue Chain = MemSD->getChain(); 5066 SDValue BasePtr = MemSD->getBasePtr(); 5067 5068 SDValue Mask, PassThru, VL; 5069 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) { 5070 Mask = VPLoad->getMask(); 5071 PassThru = DAG.getUNDEF(VT); 5072 VL = VPLoad->getVectorLength(); 5073 } else { 5074 const auto *MLoad = cast<MaskedLoadSDNode>(Op); 5075 Mask = MLoad->getMask(); 5076 PassThru = MLoad->getPassThru(); 5077 } 5078 5079 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 5080 5081 MVT XLenVT = Subtarget.getXLenVT(); 5082 5083 MVT ContainerVT = VT; 5084 if (VT.isFixedLengthVector()) { 5085 ContainerVT = getContainerForFixedLengthVector(VT); 5086 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 5087 if (!IsUnmasked) { 5088 MVT MaskVT = 5089 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 5090 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 5091 } 5092 } 5093 5094 if (!VL) 5095 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 5096 5097 unsigned IntID = 5098 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; 5099 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 5100 if (!IsUnmasked) 5101 Ops.push_back(PassThru); 5102 Ops.push_back(BasePtr); 5103 if (!IsUnmasked) 5104 Ops.push_back(Mask); 5105 Ops.push_back(VL); 5106 if (!IsUnmasked) 5107 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 5108 5109 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 5110 5111 SDValue Result = 5112 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 5113 Chain = Result.getValue(1); 5114 5115 if (VT.isFixedLengthVector()) 5116 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 5117 5118 return DAG.getMergeValues({Result, Chain}, DL); 5119 } 5120 5121 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, 5122 SelectionDAG &DAG) const { 5123 SDLoc DL(Op); 5124 5125 const auto *MemSD = cast<MemSDNode>(Op); 5126 EVT MemVT = MemSD->getMemoryVT(); 5127 MachineMemOperand *MMO = MemSD->getMemOperand(); 5128 SDValue Chain = MemSD->getChain(); 5129 SDValue BasePtr = MemSD->getBasePtr(); 5130 SDValue Val, Mask, VL; 5131 5132 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) { 5133 Val = VPStore->getValue(); 5134 Mask = VPStore->getMask(); 5135 VL = VPStore->getVectorLength(); 5136 } else { 5137 const auto *MStore = cast<MaskedStoreSDNode>(Op); 5138 Val = MStore->getValue(); 5139 Mask = MStore->getMask(); 5140 } 5141 5142 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 5143 5144 MVT VT = Val.getSimpleValueType(); 5145 MVT XLenVT = Subtarget.getXLenVT(); 5146 5147 MVT ContainerVT = VT; 5148 if (VT.isFixedLengthVector()) { 5149 ContainerVT = getContainerForFixedLengthVector(VT); 5150 5151 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 5152 if (!IsUnmasked) { 5153 MVT MaskVT = 5154 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 5155 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 5156 } 5157 } 5158 5159 if (!VL) 5160 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 5161 5162 unsigned IntID = 5163 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask; 5164 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 5165 Ops.push_back(Val); 5166 Ops.push_back(BasePtr); 5167 if (!IsUnmasked) 5168 Ops.push_back(Mask); 5169 Ops.push_back(VL); 5170 5171 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 5172 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 5173 } 5174 5175 SDValue 5176 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, 5177 SelectionDAG &DAG) const { 5178 MVT InVT = Op.getOperand(0).getSimpleValueType(); 5179 MVT ContainerVT = getContainerForFixedLengthVector(InVT); 5180 5181 MVT VT = Op.getSimpleValueType(); 5182 5183 SDValue Op1 = 5184 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget); 5185 SDValue Op2 = 5186 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 5187 5188 SDLoc DL(Op); 5189 SDValue VL = 5190 DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); 5191 5192 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 5193 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 5194 5195 SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2, 5196 Op.getOperand(2), Mask, VL); 5197 5198 return convertFromScalableVector(VT, Cmp, DAG, Subtarget); 5199 } 5200 5201 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( 5202 SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const { 5203 MVT VT = Op.getSimpleValueType(); 5204 5205 if (VT.getVectorElementType() == MVT::i1) 5206 return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false); 5207 5208 return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); 5209 } 5210 5211 SDValue 5212 RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op, 5213 SelectionDAG &DAG) const { 5214 unsigned Opc; 5215 switch (Op.getOpcode()) { 5216 default: llvm_unreachable("Unexpected opcode!"); 5217 case ISD::SHL: Opc = RISCVISD::SHL_VL; break; 5218 case ISD::SRA: Opc = RISCVISD::SRA_VL; break; 5219 case ISD::SRL: Opc = RISCVISD::SRL_VL; break; 5220 } 5221 5222 return lowerToScalableOp(Op, DAG, Opc); 5223 } 5224 5225 // Lower vector ABS to smax(X, sub(0, X)). 5226 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { 5227 SDLoc DL(Op); 5228 MVT VT = Op.getSimpleValueType(); 5229 SDValue X = Op.getOperand(0); 5230 5231 assert(VT.isFixedLengthVector() && "Unexpected type"); 5232 5233 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5234 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); 5235 5236 SDValue Mask, VL; 5237 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5238 5239 SDValue SplatZero = 5240 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 5241 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 5242 SDValue NegX = 5243 DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL); 5244 SDValue Max = 5245 DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL); 5246 5247 return convertFromScalableVector(VT, Max, DAG, Subtarget); 5248 } 5249 5250 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV( 5251 SDValue Op, SelectionDAG &DAG) const { 5252 SDLoc DL(Op); 5253 MVT VT = Op.getSimpleValueType(); 5254 SDValue Mag = Op.getOperand(0); 5255 SDValue Sign = Op.getOperand(1); 5256 assert(Mag.getValueType() == Sign.getValueType() && 5257 "Can only handle COPYSIGN with matching types."); 5258 5259 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5260 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget); 5261 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget); 5262 5263 SDValue Mask, VL; 5264 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5265 5266 SDValue CopySign = 5267 DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL); 5268 5269 return convertFromScalableVector(VT, CopySign, DAG, Subtarget); 5270 } 5271 5272 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( 5273 SDValue Op, SelectionDAG &DAG) const { 5274 MVT VT = Op.getSimpleValueType(); 5275 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5276 5277 MVT I1ContainerVT = 5278 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 5279 5280 SDValue CC = 5281 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget); 5282 SDValue Op1 = 5283 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget); 5284 SDValue Op2 = 5285 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget); 5286 5287 SDLoc DL(Op); 5288 SDValue Mask, VL; 5289 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5290 5291 SDValue Select = 5292 DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); 5293 5294 return convertFromScalableVector(VT, Select, DAG, Subtarget); 5295 } 5296 5297 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG, 5298 unsigned NewOpc, 5299 bool HasMask) const { 5300 MVT VT = Op.getSimpleValueType(); 5301 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5302 5303 // Create list of operands by converting existing ones to scalable types. 5304 SmallVector<SDValue, 6> Ops; 5305 for (const SDValue &V : Op->op_values()) { 5306 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 5307 5308 // Pass through non-vector operands. 5309 if (!V.getValueType().isVector()) { 5310 Ops.push_back(V); 5311 continue; 5312 } 5313 5314 // "cast" fixed length vector to a scalable vector. 5315 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) && 5316 "Only fixed length vectors are supported!"); 5317 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 5318 } 5319 5320 SDLoc DL(Op); 5321 SDValue Mask, VL; 5322 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); 5323 if (HasMask) 5324 Ops.push_back(Mask); 5325 Ops.push_back(VL); 5326 5327 SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops); 5328 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget); 5329 } 5330 5331 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node: 5332 // * Operands of each node are assumed to be in the same order. 5333 // * The EVL operand is promoted from i32 to i64 on RV64. 5334 // * Fixed-length vectors are converted to their scalable-vector container 5335 // types. 5336 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, 5337 unsigned RISCVISDOpc) const { 5338 SDLoc DL(Op); 5339 MVT VT = Op.getSimpleValueType(); 5340 SmallVector<SDValue, 4> Ops; 5341 5342 for (const auto &OpIdx : enumerate(Op->ops())) { 5343 SDValue V = OpIdx.value(); 5344 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 5345 // Pass through operands which aren't fixed-length vectors. 5346 if (!V.getValueType().isFixedLengthVector()) { 5347 Ops.push_back(V); 5348 continue; 5349 } 5350 // "cast" fixed length vector to a scalable vector. 5351 MVT OpVT = V.getSimpleValueType(); 5352 MVT ContainerVT = getContainerForFixedLengthVector(OpVT); 5353 assert(useRVVForFixedLengthVectorVT(OpVT) && 5354 "Only fixed length vectors are supported!"); 5355 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget)); 5356 } 5357 5358 if (!VT.isFixedLengthVector()) 5359 return DAG.getNode(RISCVISDOpc, DL, VT, Ops); 5360 5361 MVT ContainerVT = getContainerForFixedLengthVector(VT); 5362 5363 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops); 5364 5365 return convertFromScalableVector(VT, VPOp, DAG, Subtarget); 5366 } 5367 5368 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be 5369 // matched to a RVV indexed load. The RVV indexed load instructions only 5370 // support the "unsigned unscaled" addressing mode; indices are implicitly 5371 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 5372 // signed or scaled indexing is extended to the XLEN value type and scaled 5373 // accordingly. 5374 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op, 5375 SelectionDAG &DAG) const { 5376 SDLoc DL(Op); 5377 MVT VT = Op.getSimpleValueType(); 5378 5379 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 5380 EVT MemVT = MemSD->getMemoryVT(); 5381 MachineMemOperand *MMO = MemSD->getMemOperand(); 5382 SDValue Chain = MemSD->getChain(); 5383 SDValue BasePtr = MemSD->getBasePtr(); 5384 5385 ISD::LoadExtType LoadExtType; 5386 SDValue Index, Mask, PassThru, VL; 5387 5388 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) { 5389 Index = VPGN->getIndex(); 5390 Mask = VPGN->getMask(); 5391 PassThru = DAG.getUNDEF(VT); 5392 VL = VPGN->getVectorLength(); 5393 // VP doesn't support extending loads. 5394 LoadExtType = ISD::NON_EXTLOAD; 5395 } else { 5396 // Else it must be a MGATHER. 5397 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode()); 5398 Index = MGN->getIndex(); 5399 Mask = MGN->getMask(); 5400 PassThru = MGN->getPassThru(); 5401 LoadExtType = MGN->getExtensionType(); 5402 } 5403 5404 MVT IndexVT = Index.getSimpleValueType(); 5405 MVT XLenVT = Subtarget.getXLenVT(); 5406 5407 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 5408 "Unexpected VTs!"); 5409 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 5410 // Targets have to explicitly opt-in for extending vector loads. 5411 assert(LoadExtType == ISD::NON_EXTLOAD && 5412 "Unexpected extending MGATHER/VP_GATHER"); 5413 (void)LoadExtType; 5414 5415 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 5416 // the selection of the masked intrinsics doesn't do this for us. 5417 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 5418 5419 MVT ContainerVT = VT; 5420 if (VT.isFixedLengthVector()) { 5421 // We need to use the larger of the result and index type to determine the 5422 // scalable type to use so we don't increase LMUL for any operand/result. 5423 if (VT.bitsGE(IndexVT)) { 5424 ContainerVT = getContainerForFixedLengthVector(VT); 5425 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 5426 ContainerVT.getVectorElementCount()); 5427 } else { 5428 IndexVT = getContainerForFixedLengthVector(IndexVT); 5429 ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(), 5430 IndexVT.getVectorElementCount()); 5431 } 5432 5433 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 5434 5435 if (!IsUnmasked) { 5436 MVT MaskVT = 5437 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 5438 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 5439 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); 5440 } 5441 } 5442 5443 if (!VL) 5444 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 5445 5446 unsigned IntID = 5447 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask; 5448 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 5449 if (!IsUnmasked) 5450 Ops.push_back(PassThru); 5451 Ops.push_back(BasePtr); 5452 Ops.push_back(Index); 5453 if (!IsUnmasked) 5454 Ops.push_back(Mask); 5455 Ops.push_back(VL); 5456 if (!IsUnmasked) 5457 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); 5458 5459 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); 5460 SDValue Result = 5461 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); 5462 Chain = Result.getValue(1); 5463 5464 if (VT.isFixedLengthVector()) 5465 Result = convertFromScalableVector(VT, Result, DAG, Subtarget); 5466 5467 return DAG.getMergeValues({Result, Chain}, DL); 5468 } 5469 5470 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be 5471 // matched to a RVV indexed store. The RVV indexed store instructions only 5472 // support the "unsigned unscaled" addressing mode; indices are implicitly 5473 // zero-extended or truncated to XLEN and are treated as byte offsets. Any 5474 // signed or scaled indexing is extended to the XLEN value type and scaled 5475 // accordingly. 5476 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op, 5477 SelectionDAG &DAG) const { 5478 SDLoc DL(Op); 5479 const auto *MemSD = cast<MemSDNode>(Op.getNode()); 5480 EVT MemVT = MemSD->getMemoryVT(); 5481 MachineMemOperand *MMO = MemSD->getMemOperand(); 5482 SDValue Chain = MemSD->getChain(); 5483 SDValue BasePtr = MemSD->getBasePtr(); 5484 5485 bool IsTruncatingStore = false; 5486 SDValue Index, Mask, Val, VL; 5487 5488 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) { 5489 Index = VPSN->getIndex(); 5490 Mask = VPSN->getMask(); 5491 Val = VPSN->getValue(); 5492 VL = VPSN->getVectorLength(); 5493 // VP doesn't support truncating stores. 5494 IsTruncatingStore = false; 5495 } else { 5496 // Else it must be a MSCATTER. 5497 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode()); 5498 Index = MSN->getIndex(); 5499 Mask = MSN->getMask(); 5500 Val = MSN->getValue(); 5501 IsTruncatingStore = MSN->isTruncatingStore(); 5502 } 5503 5504 MVT VT = Val.getSimpleValueType(); 5505 MVT IndexVT = Index.getSimpleValueType(); 5506 MVT XLenVT = Subtarget.getXLenVT(); 5507 5508 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 5509 "Unexpected VTs!"); 5510 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type"); 5511 // Targets have to explicitly opt-in for extending vector loads and 5512 // truncating vector stores. 5513 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER"); 5514 (void)IsTruncatingStore; 5515 5516 // If the mask is known to be all ones, optimize to an unmasked intrinsic; 5517 // the selection of the masked intrinsics doesn't do this for us. 5518 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); 5519 5520 MVT ContainerVT = VT; 5521 if (VT.isFixedLengthVector()) { 5522 // We need to use the larger of the value and index type to determine the 5523 // scalable type to use so we don't increase LMUL for any operand/result. 5524 if (VT.bitsGE(IndexVT)) { 5525 ContainerVT = getContainerForFixedLengthVector(VT); 5526 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), 5527 ContainerVT.getVectorElementCount()); 5528 } else { 5529 IndexVT = getContainerForFixedLengthVector(IndexVT); 5530 ContainerVT = MVT::getVectorVT(VT.getVectorElementType(), 5531 IndexVT.getVectorElementCount()); 5532 } 5533 5534 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget); 5535 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); 5536 5537 if (!IsUnmasked) { 5538 MVT MaskVT = 5539 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 5540 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); 5541 } 5542 } 5543 5544 if (!VL) 5545 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; 5546 5547 unsigned IntID = 5548 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask; 5549 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; 5550 Ops.push_back(Val); 5551 Ops.push_back(BasePtr); 5552 Ops.push_back(Index); 5553 if (!IsUnmasked) 5554 Ops.push_back(Mask); 5555 Ops.push_back(VL); 5556 5557 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, 5558 DAG.getVTList(MVT::Other), Ops, MemVT, MMO); 5559 } 5560 5561 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op, 5562 SelectionDAG &DAG) const { 5563 const MVT XLenVT = Subtarget.getXLenVT(); 5564 SDLoc DL(Op); 5565 SDValue Chain = Op->getOperand(0); 5566 SDValue SysRegNo = DAG.getTargetConstant( 5567 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 5568 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other); 5569 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo); 5570 5571 // Encoding used for rounding mode in RISCV differs from that used in 5572 // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a 5573 // table, which consists of a sequence of 4-bit fields, each representing 5574 // corresponding FLT_ROUNDS mode. 5575 static const int Table = 5576 (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) | 5577 (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) | 5578 (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) | 5579 (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) | 5580 (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM); 5581 5582 SDValue Shift = 5583 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT)); 5584 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 5585 DAG.getConstant(Table, DL, XLenVT), Shift); 5586 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 5587 DAG.getConstant(7, DL, XLenVT)); 5588 5589 return DAG.getMergeValues({Masked, Chain}, DL); 5590 } 5591 5592 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op, 5593 SelectionDAG &DAG) const { 5594 const MVT XLenVT = Subtarget.getXLenVT(); 5595 SDLoc DL(Op); 5596 SDValue Chain = Op->getOperand(0); 5597 SDValue RMValue = Op->getOperand(1); 5598 SDValue SysRegNo = DAG.getTargetConstant( 5599 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT); 5600 5601 // Encoding used for rounding mode in RISCV differs from that used in 5602 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in 5603 // a table, which consists of a sequence of 4-bit fields, each representing 5604 // corresponding RISCV mode. 5605 static const unsigned Table = 5606 (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) | 5607 (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) | 5608 (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) | 5609 (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) | 5610 (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway)); 5611 5612 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue, 5613 DAG.getConstant(2, DL, XLenVT)); 5614 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT, 5615 DAG.getConstant(Table, DL, XLenVT), Shift); 5616 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted, 5617 DAG.getConstant(0x7, DL, XLenVT)); 5618 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo, 5619 RMValue); 5620 } 5621 5622 // Returns the opcode of the target-specific SDNode that implements the 32-bit 5623 // form of the given Opcode. 5624 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 5625 switch (Opcode) { 5626 default: 5627 llvm_unreachable("Unexpected opcode"); 5628 case ISD::SHL: 5629 return RISCVISD::SLLW; 5630 case ISD::SRA: 5631 return RISCVISD::SRAW; 5632 case ISD::SRL: 5633 return RISCVISD::SRLW; 5634 case ISD::SDIV: 5635 return RISCVISD::DIVW; 5636 case ISD::UDIV: 5637 return RISCVISD::DIVUW; 5638 case ISD::UREM: 5639 return RISCVISD::REMUW; 5640 case ISD::ROTL: 5641 return RISCVISD::ROLW; 5642 case ISD::ROTR: 5643 return RISCVISD::RORW; 5644 case RISCVISD::GREV: 5645 return RISCVISD::GREVW; 5646 case RISCVISD::GORC: 5647 return RISCVISD::GORCW; 5648 } 5649 } 5650 5651 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG 5652 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would 5653 // otherwise be promoted to i64, making it difficult to select the 5654 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of 5655 // type i8/i16/i32 is lost. 5656 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, 5657 unsigned ExtOpc = ISD::ANY_EXTEND) { 5658 SDLoc DL(N); 5659 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 5660 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); 5661 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); 5662 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 5663 // ReplaceNodeResults requires we maintain the same type for the return value. 5664 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); 5665 } 5666 5667 // Converts the given 32-bit operation to a i64 operation with signed extension 5668 // semantic to reduce the signed extension instructions. 5669 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 5670 SDLoc DL(N); 5671 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 5672 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 5673 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 5674 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 5675 DAG.getValueType(MVT::i32)); 5676 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 5677 } 5678 5679 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 5680 SmallVectorImpl<SDValue> &Results, 5681 SelectionDAG &DAG) const { 5682 SDLoc DL(N); 5683 switch (N->getOpcode()) { 5684 default: 5685 llvm_unreachable("Don't know how to custom type legalize this operation!"); 5686 case ISD::STRICT_FP_TO_SINT: 5687 case ISD::STRICT_FP_TO_UINT: 5688 case ISD::FP_TO_SINT: 5689 case ISD::FP_TO_UINT: { 5690 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5691 "Unexpected custom legalisation"); 5692 bool IsStrict = N->isStrictFPOpcode(); 5693 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || 5694 N->getOpcode() == ISD::STRICT_FP_TO_SINT; 5695 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 5696 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != 5697 TargetLowering::TypeSoftenFloat) { 5698 // FIXME: Support strict FP. 5699 if (IsStrict) 5700 return; 5701 if (!isTypeLegal(Op0.getValueType())) 5702 return; 5703 unsigned Opc = 5704 IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64; 5705 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0); 5706 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 5707 return; 5708 } 5709 // If the FP type needs to be softened, emit a library call using the 'si' 5710 // version. If we left it to default legalization we'd end up with 'di'. If 5711 // the FP type doesn't need to be softened just let generic type 5712 // legalization promote the result type. 5713 RTLIB::Libcall LC; 5714 if (IsSigned) 5715 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 5716 else 5717 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 5718 MakeLibCallOptions CallOptions; 5719 EVT OpVT = Op0.getValueType(); 5720 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 5721 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 5722 SDValue Result; 5723 std::tie(Result, Chain) = 5724 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 5725 Results.push_back(Result); 5726 if (IsStrict) 5727 Results.push_back(Chain); 5728 break; 5729 } 5730 case ISD::READCYCLECOUNTER: { 5731 assert(!Subtarget.is64Bit() && 5732 "READCYCLECOUNTER only has custom type legalization on riscv32"); 5733 5734 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 5735 SDValue RCW = 5736 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 5737 5738 Results.push_back( 5739 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 5740 Results.push_back(RCW.getValue(2)); 5741 break; 5742 } 5743 case ISD::MUL: { 5744 unsigned Size = N->getSimpleValueType(0).getSizeInBits(); 5745 unsigned XLen = Subtarget.getXLen(); 5746 // This multiply needs to be expanded, try to use MULHSU+MUL if possible. 5747 if (Size > XLen) { 5748 assert(Size == (XLen * 2) && "Unexpected custom legalisation"); 5749 SDValue LHS = N->getOperand(0); 5750 SDValue RHS = N->getOperand(1); 5751 APInt HighMask = APInt::getHighBitsSet(Size, XLen); 5752 5753 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask); 5754 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask); 5755 // We need exactly one side to be unsigned. 5756 if (LHSIsU == RHSIsU) 5757 return; 5758 5759 auto MakeMULPair = [&](SDValue S, SDValue U) { 5760 MVT XLenVT = Subtarget.getXLenVT(); 5761 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S); 5762 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U); 5763 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U); 5764 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U); 5765 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi); 5766 }; 5767 5768 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen; 5769 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen; 5770 5771 // The other operand should be signed, but still prefer MULH when 5772 // possible. 5773 if (RHSIsU && LHSIsS && !RHSIsS) 5774 Results.push_back(MakeMULPair(LHS, RHS)); 5775 else if (LHSIsU && RHSIsS && !LHSIsS) 5776 Results.push_back(MakeMULPair(RHS, LHS)); 5777 5778 return; 5779 } 5780 LLVM_FALLTHROUGH; 5781 } 5782 case ISD::ADD: 5783 case ISD::SUB: 5784 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5785 "Unexpected custom legalisation"); 5786 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 5787 break; 5788 case ISD::SHL: 5789 case ISD::SRA: 5790 case ISD::SRL: 5791 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5792 "Unexpected custom legalisation"); 5793 if (N->getOperand(1).getOpcode() != ISD::Constant) { 5794 Results.push_back(customLegalizeToWOp(N, DAG)); 5795 break; 5796 } 5797 5798 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is 5799 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the 5800 // shift amount. 5801 if (N->getOpcode() == ISD::SHL) { 5802 SDLoc DL(N); 5803 SDValue NewOp0 = 5804 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 5805 SDValue NewOp1 = 5806 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); 5807 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1); 5808 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 5809 DAG.getValueType(MVT::i32)); 5810 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 5811 } 5812 5813 break; 5814 case ISD::ROTL: 5815 case ISD::ROTR: 5816 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5817 "Unexpected custom legalisation"); 5818 Results.push_back(customLegalizeToWOp(N, DAG)); 5819 break; 5820 case ISD::CTTZ: 5821 case ISD::CTTZ_ZERO_UNDEF: 5822 case ISD::CTLZ: 5823 case ISD::CTLZ_ZERO_UNDEF: { 5824 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5825 "Unexpected custom legalisation"); 5826 5827 SDValue NewOp0 = 5828 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 5829 bool IsCTZ = 5830 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF; 5831 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW; 5832 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0); 5833 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 5834 return; 5835 } 5836 case ISD::SDIV: 5837 case ISD::UDIV: 5838 case ISD::UREM: { 5839 MVT VT = N->getSimpleValueType(0); 5840 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && 5841 Subtarget.is64Bit() && Subtarget.hasStdExtM() && 5842 "Unexpected custom legalisation"); 5843 // Don't promote division/remainder by constant since we should expand those 5844 // to multiply by magic constant. 5845 // FIXME: What if the expansion is disabled for minsize. 5846 if (N->getOperand(1).getOpcode() == ISD::Constant) 5847 return; 5848 5849 // If the input is i32, use ANY_EXTEND since the W instructions don't read 5850 // the upper 32 bits. For other types we need to sign or zero extend 5851 // based on the opcode. 5852 unsigned ExtOpc = ISD::ANY_EXTEND; 5853 if (VT != MVT::i32) 5854 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND 5855 : ISD::ZERO_EXTEND; 5856 5857 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc)); 5858 break; 5859 } 5860 case ISD::UADDO: 5861 case ISD::USUBO: { 5862 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5863 "Unexpected custom legalisation"); 5864 bool IsAdd = N->getOpcode() == ISD::UADDO; 5865 // Create an ADDW or SUBW. 5866 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 5867 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 5868 SDValue Res = 5869 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS); 5870 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res, 5871 DAG.getValueType(MVT::i32)); 5872 5873 // Sign extend the LHS and perform an unsigned compare with the ADDW result. 5874 // Since the inputs are sign extended from i32, this is equivalent to 5875 // comparing the lower 32 bits. 5876 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 5877 SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS, 5878 IsAdd ? ISD::SETULT : ISD::SETUGT); 5879 5880 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 5881 Results.push_back(Overflow); 5882 return; 5883 } 5884 case ISD::UADDSAT: 5885 case ISD::USUBSAT: { 5886 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5887 "Unexpected custom legalisation"); 5888 if (Subtarget.hasStdExtZbb()) { 5889 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using 5890 // sign extend allows overflow of the lower 32 bits to be detected on 5891 // the promoted size. 5892 SDValue LHS = 5893 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); 5894 SDValue RHS = 5895 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); 5896 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); 5897 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 5898 return; 5899 } 5900 5901 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom 5902 // promotion for UADDO/USUBO. 5903 Results.push_back(expandAddSubSat(N, DAG)); 5904 return; 5905 } 5906 case ISD::BITCAST: { 5907 EVT VT = N->getValueType(0); 5908 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!"); 5909 SDValue Op0 = N->getOperand(0); 5910 EVT Op0VT = Op0.getValueType(); 5911 MVT XLenVT = Subtarget.getXLenVT(); 5912 if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) { 5913 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); 5914 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); 5915 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() && 5916 Subtarget.hasStdExtF()) { 5917 SDValue FPConv = 5918 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 5919 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 5920 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() && 5921 isTypeLegal(Op0VT)) { 5922 // Custom-legalize bitcasts from fixed-length vector types to illegal 5923 // scalar types in order to improve codegen. Bitcast the vector to a 5924 // one-element vector type whose element type is the same as the result 5925 // type, and extract the first element. 5926 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1); 5927 if (isTypeLegal(BVT)) { 5928 SDValue BVec = DAG.getBitcast(BVT, Op0); 5929 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, 5930 DAG.getConstant(0, DL, XLenVT))); 5931 } 5932 } 5933 break; 5934 } 5935 case RISCVISD::GREV: 5936 case RISCVISD::GORC: { 5937 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5938 "Unexpected custom legalisation"); 5939 assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant"); 5940 // This is similar to customLegalizeToWOp, except that we pass the second 5941 // operand (a TargetConstant) straight through: it is already of type 5942 // XLenVT. 5943 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 5944 SDValue NewOp0 = 5945 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 5946 SDValue NewOp1 = 5947 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 5948 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 5949 // ReplaceNodeResults requires we maintain the same type for the return 5950 // value. 5951 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 5952 break; 5953 } 5954 case RISCVISD::SHFL: { 5955 // There is no SHFLIW instruction, but we can just promote the operation. 5956 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5957 "Unexpected custom legalisation"); 5958 assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant"); 5959 SDValue NewOp0 = 5960 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 5961 SDValue NewOp1 = 5962 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 5963 SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1); 5964 // ReplaceNodeResults requires we maintain the same type for the return 5965 // value. 5966 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); 5967 break; 5968 } 5969 case ISD::BSWAP: 5970 case ISD::BITREVERSE: { 5971 MVT VT = N->getSimpleValueType(0); 5972 MVT XLenVT = Subtarget.getXLenVT(); 5973 assert((VT == MVT::i8 || VT == MVT::i16 || 5974 (VT == MVT::i32 && Subtarget.is64Bit())) && 5975 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation"); 5976 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0)); 5977 unsigned Imm = VT.getSizeInBits() - 1; 5978 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. 5979 if (N->getOpcode() == ISD::BSWAP) 5980 Imm &= ~0x7U; 5981 unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV; 5982 SDValue GREVI = 5983 DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT)); 5984 // ReplaceNodeResults requires we maintain the same type for the return 5985 // value. 5986 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI)); 5987 break; 5988 } 5989 case ISD::FSHL: 5990 case ISD::FSHR: { 5991 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 5992 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation"); 5993 SDValue NewOp0 = 5994 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 5995 SDValue NewOp1 = 5996 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 5997 SDValue NewOp2 = 5998 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 5999 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits. 6000 // Mask the shift amount to 5 bits. 6001 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 6002 DAG.getConstant(0x1f, DL, MVT::i64)); 6003 unsigned Opc = 6004 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW; 6005 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2); 6006 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp)); 6007 break; 6008 } 6009 case ISD::EXTRACT_VECTOR_ELT: { 6010 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element 6011 // type is illegal (currently only vXi64 RV32). 6012 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are 6013 // transferred to the destination register. We issue two of these from the 6014 // upper- and lower- halves of the SEW-bit vector element, slid down to the 6015 // first element. 6016 SDValue Vec = N->getOperand(0); 6017 SDValue Idx = N->getOperand(1); 6018 6019 // The vector type hasn't been legalized yet so we can't issue target 6020 // specific nodes if it needs legalization. 6021 // FIXME: We would manually legalize if it's important. 6022 if (!isTypeLegal(Vec.getValueType())) 6023 return; 6024 6025 MVT VecVT = Vec.getSimpleValueType(); 6026 6027 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && 6028 VecVT.getVectorElementType() == MVT::i64 && 6029 "Unexpected EXTRACT_VECTOR_ELT legalization"); 6030 6031 // If this is a fixed vector, we need to convert it to a scalable vector. 6032 MVT ContainerVT = VecVT; 6033 if (VecVT.isFixedLengthVector()) { 6034 ContainerVT = getContainerForFixedLengthVector(VecVT); 6035 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); 6036 } 6037 6038 MVT XLenVT = Subtarget.getXLenVT(); 6039 6040 // Use a VL of 1 to avoid processing more elements than we need. 6041 MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); 6042 SDValue VL = DAG.getConstant(1, DL, XLenVT); 6043 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 6044 6045 // Unless the index is known to be 0, we must slide the vector down to get 6046 // the desired element into index 0. 6047 if (!isNullConstant(Idx)) { 6048 Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, 6049 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); 6050 } 6051 6052 // Extract the lower XLEN bits of the correct vector element. 6053 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 6054 6055 // To extract the upper XLEN bits of the vector element, shift the first 6056 // element right by 32 bits and re-extract the lower XLEN bits. 6057 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, 6058 DAG.getConstant(32, DL, XLenVT), VL); 6059 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, 6060 ThirtyTwoV, Mask, VL); 6061 6062 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 6063 6064 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 6065 break; 6066 } 6067 case ISD::INTRINSIC_WO_CHAIN: { 6068 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 6069 switch (IntNo) { 6070 default: 6071 llvm_unreachable( 6072 "Don't know how to custom type legalize this intrinsic!"); 6073 case Intrinsic::riscv_orc_b: { 6074 // Lower to the GORCI encoding for orc.b with the operand extended. 6075 SDValue NewOp = 6076 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 6077 // If Zbp is enabled, use GORCIW which will sign extend the result. 6078 unsigned Opc = 6079 Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC; 6080 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp, 6081 DAG.getConstant(7, DL, MVT::i64)); 6082 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 6083 return; 6084 } 6085 case Intrinsic::riscv_grev: 6086 case Intrinsic::riscv_gorc: { 6087 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 6088 "Unexpected custom legalisation"); 6089 SDValue NewOp1 = 6090 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 6091 SDValue NewOp2 = 6092 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 6093 unsigned Opc = 6094 IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW; 6095 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2); 6096 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 6097 break; 6098 } 6099 case Intrinsic::riscv_shfl: 6100 case Intrinsic::riscv_unshfl: { 6101 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 6102 "Unexpected custom legalisation"); 6103 SDValue NewOp1 = 6104 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 6105 SDValue NewOp2 = 6106 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 6107 unsigned Opc = 6108 IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW; 6109 if (isa<ConstantSDNode>(N->getOperand(2))) { 6110 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, 6111 DAG.getConstant(0xf, DL, MVT::i64)); 6112 Opc = 6113 IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL; 6114 } 6115 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2); 6116 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 6117 break; 6118 } 6119 case Intrinsic::riscv_bcompress: 6120 case Intrinsic::riscv_bdecompress: { 6121 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 6122 "Unexpected custom legalisation"); 6123 SDValue NewOp1 = 6124 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 6125 SDValue NewOp2 = 6126 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); 6127 unsigned Opc = IntNo == Intrinsic::riscv_bcompress 6128 ? RISCVISD::BCOMPRESSW 6129 : RISCVISD::BDECOMPRESSW; 6130 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2); 6131 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); 6132 break; 6133 } 6134 case Intrinsic::riscv_vmv_x_s: { 6135 EVT VT = N->getValueType(0); 6136 MVT XLenVT = Subtarget.getXLenVT(); 6137 if (VT.bitsLT(XLenVT)) { 6138 // Simple case just extract using vmv.x.s and truncate. 6139 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL, 6140 Subtarget.getXLenVT(), N->getOperand(1)); 6141 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract)); 6142 return; 6143 } 6144 6145 assert(VT == MVT::i64 && !Subtarget.is64Bit() && 6146 "Unexpected custom legalization"); 6147 6148 // We need to do the move in two steps. 6149 SDValue Vec = N->getOperand(1); 6150 MVT VecVT = Vec.getSimpleValueType(); 6151 6152 // First extract the lower XLEN bits of the element. 6153 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); 6154 6155 // To extract the upper XLEN bits of the vector element, shift the first 6156 // element right by 32 bits and re-extract the lower XLEN bits. 6157 SDValue VL = DAG.getConstant(1, DL, XLenVT); 6158 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 6159 SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); 6160 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, 6161 DAG.getConstant(32, DL, XLenVT), VL); 6162 SDValue LShr32 = 6163 DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL); 6164 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); 6165 6166 Results.push_back( 6167 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); 6168 break; 6169 } 6170 } 6171 break; 6172 } 6173 case ISD::VECREDUCE_ADD: 6174 case ISD::VECREDUCE_AND: 6175 case ISD::VECREDUCE_OR: 6176 case ISD::VECREDUCE_XOR: 6177 case ISD::VECREDUCE_SMAX: 6178 case ISD::VECREDUCE_UMAX: 6179 case ISD::VECREDUCE_SMIN: 6180 case ISD::VECREDUCE_UMIN: 6181 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG)) 6182 Results.push_back(V); 6183 break; 6184 case ISD::VP_REDUCE_ADD: 6185 case ISD::VP_REDUCE_AND: 6186 case ISD::VP_REDUCE_OR: 6187 case ISD::VP_REDUCE_XOR: 6188 case ISD::VP_REDUCE_SMAX: 6189 case ISD::VP_REDUCE_UMAX: 6190 case ISD::VP_REDUCE_SMIN: 6191 case ISD::VP_REDUCE_UMIN: 6192 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG)) 6193 Results.push_back(V); 6194 break; 6195 case ISD::FLT_ROUNDS_: { 6196 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other); 6197 SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0)); 6198 Results.push_back(Res.getValue(0)); 6199 Results.push_back(Res.getValue(1)); 6200 break; 6201 } 6202 } 6203 } 6204 6205 // A structure to hold one of the bit-manipulation patterns below. Together, a 6206 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source: 6207 // (or (and (shl x, 1), 0xAAAAAAAA), 6208 // (and (srl x, 1), 0x55555555)) 6209 struct RISCVBitmanipPat { 6210 SDValue Op; 6211 unsigned ShAmt; 6212 bool IsSHL; 6213 6214 bool formsPairWith(const RISCVBitmanipPat &Other) const { 6215 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL; 6216 } 6217 }; 6218 6219 // Matches patterns of the form 6220 // (and (shl x, C2), (C1 << C2)) 6221 // (and (srl x, C2), C1) 6222 // (shl (and x, C1), C2) 6223 // (srl (and x, (C1 << C2)), C2) 6224 // Where C2 is a power of 2 and C1 has at least that many leading zeroes. 6225 // The expected masks for each shift amount are specified in BitmanipMasks where 6226 // BitmanipMasks[log2(C2)] specifies the expected C1 value. 6227 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether 6228 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible 6229 // XLen is 64. 6230 static Optional<RISCVBitmanipPat> 6231 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) { 6232 assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) && 6233 "Unexpected number of masks"); 6234 Optional<uint64_t> Mask; 6235 // Optionally consume a mask around the shift operation. 6236 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) { 6237 Mask = Op.getConstantOperandVal(1); 6238 Op = Op.getOperand(0); 6239 } 6240 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL) 6241 return None; 6242 bool IsSHL = Op.getOpcode() == ISD::SHL; 6243 6244 if (!isa<ConstantSDNode>(Op.getOperand(1))) 6245 return None; 6246 uint64_t ShAmt = Op.getConstantOperandVal(1); 6247 6248 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 6249 if (ShAmt >= Width || !isPowerOf2_64(ShAmt)) 6250 return None; 6251 // If we don't have enough masks for 64 bit, then we must be trying to 6252 // match SHFL so we're only allowed to shift 1/4 of the width. 6253 if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2)) 6254 return None; 6255 6256 SDValue Src = Op.getOperand(0); 6257 6258 // The expected mask is shifted left when the AND is found around SHL 6259 // patterns. 6260 // ((x >> 1) & 0x55555555) 6261 // ((x << 1) & 0xAAAAAAAA) 6262 bool SHLExpMask = IsSHL; 6263 6264 if (!Mask) { 6265 // Sometimes LLVM keeps the mask as an operand of the shift, typically when 6266 // the mask is all ones: consume that now. 6267 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) { 6268 Mask = Src.getConstantOperandVal(1); 6269 Src = Src.getOperand(0); 6270 // The expected mask is now in fact shifted left for SRL, so reverse the 6271 // decision. 6272 // ((x & 0xAAAAAAAA) >> 1) 6273 // ((x & 0x55555555) << 1) 6274 SHLExpMask = !SHLExpMask; 6275 } else { 6276 // Use a default shifted mask of all-ones if there's no AND, truncated 6277 // down to the expected width. This simplifies the logic later on. 6278 Mask = maskTrailingOnes<uint64_t>(Width); 6279 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt); 6280 } 6281 } 6282 6283 unsigned MaskIdx = Log2_32(ShAmt); 6284 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 6285 6286 if (SHLExpMask) 6287 ExpMask <<= ShAmt; 6288 6289 if (Mask != ExpMask) 6290 return None; 6291 6292 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL}; 6293 } 6294 6295 // Matches any of the following bit-manipulation patterns: 6296 // (and (shl x, 1), (0x55555555 << 1)) 6297 // (and (srl x, 1), 0x55555555) 6298 // (shl (and x, 0x55555555), 1) 6299 // (srl (and x, (0x55555555 << 1)), 1) 6300 // where the shift amount and mask may vary thus: 6301 // [1] = 0x55555555 / 0xAAAAAAAA 6302 // [2] = 0x33333333 / 0xCCCCCCCC 6303 // [4] = 0x0F0F0F0F / 0xF0F0F0F0 6304 // [8] = 0x00FF00FF / 0xFF00FF00 6305 // [16] = 0x0000FFFF / 0xFFFFFFFF 6306 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64) 6307 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) { 6308 // These are the unshifted masks which we use to match bit-manipulation 6309 // patterns. They may be shifted left in certain circumstances. 6310 static const uint64_t BitmanipMasks[] = { 6311 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 6312 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; 6313 6314 return matchRISCVBitmanipPat(Op, BitmanipMasks); 6315 } 6316 6317 // Match the following pattern as a GREVI(W) operation 6318 // (or (BITMANIP_SHL x), (BITMANIP_SRL x)) 6319 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG, 6320 const RISCVSubtarget &Subtarget) { 6321 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 6322 EVT VT = Op.getValueType(); 6323 6324 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 6325 auto LHS = matchGREVIPat(Op.getOperand(0)); 6326 auto RHS = matchGREVIPat(Op.getOperand(1)); 6327 if (LHS && RHS && LHS->formsPairWith(*RHS)) { 6328 SDLoc DL(Op); 6329 return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op, 6330 DAG.getConstant(LHS->ShAmt, DL, VT)); 6331 } 6332 } 6333 return SDValue(); 6334 } 6335 6336 // Matches any the following pattern as a GORCI(W) operation 6337 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2 6338 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2 6339 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x)) 6340 // Note that with the variant of 3., 6341 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x) 6342 // the inner pattern will first be matched as GREVI and then the outer 6343 // pattern will be matched to GORC via the first rule above. 6344 // 4. (or (rotl/rotr x, bitwidth/2), x) 6345 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG, 6346 const RISCVSubtarget &Subtarget) { 6347 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 6348 EVT VT = Op.getValueType(); 6349 6350 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) { 6351 SDLoc DL(Op); 6352 SDValue Op0 = Op.getOperand(0); 6353 SDValue Op1 = Op.getOperand(1); 6354 6355 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) { 6356 if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X && 6357 isa<ConstantSDNode>(Reverse.getOperand(1)) && 6358 isPowerOf2_32(Reverse.getConstantOperandVal(1))) 6359 return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1)); 6360 // We can also form GORCI from ROTL/ROTR by half the bitwidth. 6361 if ((Reverse.getOpcode() == ISD::ROTL || 6362 Reverse.getOpcode() == ISD::ROTR) && 6363 Reverse.getOperand(0) == X && 6364 isa<ConstantSDNode>(Reverse.getOperand(1))) { 6365 uint64_t RotAmt = Reverse.getConstantOperandVal(1); 6366 if (RotAmt == (VT.getSizeInBits() / 2)) 6367 return DAG.getNode(RISCVISD::GORC, DL, VT, X, 6368 DAG.getConstant(RotAmt, DL, VT)); 6369 } 6370 return SDValue(); 6371 }; 6372 6373 // Check for either commutable permutation of (or (GREVI x, shamt), x) 6374 if (SDValue V = MatchOROfReverse(Op0, Op1)) 6375 return V; 6376 if (SDValue V = MatchOROfReverse(Op1, Op0)) 6377 return V; 6378 6379 // OR is commutable so canonicalize its OR operand to the left 6380 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR) 6381 std::swap(Op0, Op1); 6382 if (Op0.getOpcode() != ISD::OR) 6383 return SDValue(); 6384 SDValue OrOp0 = Op0.getOperand(0); 6385 SDValue OrOp1 = Op0.getOperand(1); 6386 auto LHS = matchGREVIPat(OrOp0); 6387 // OR is commutable so swap the operands and try again: x might have been 6388 // on the left 6389 if (!LHS) { 6390 std::swap(OrOp0, OrOp1); 6391 LHS = matchGREVIPat(OrOp0); 6392 } 6393 auto RHS = matchGREVIPat(Op1); 6394 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) { 6395 return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op, 6396 DAG.getConstant(LHS->ShAmt, DL, VT)); 6397 } 6398 } 6399 return SDValue(); 6400 } 6401 6402 // Matches any of the following bit-manipulation patterns: 6403 // (and (shl x, 1), (0x22222222 << 1)) 6404 // (and (srl x, 1), 0x22222222) 6405 // (shl (and x, 0x22222222), 1) 6406 // (srl (and x, (0x22222222 << 1)), 1) 6407 // where the shift amount and mask may vary thus: 6408 // [1] = 0x22222222 / 0x44444444 6409 // [2] = 0x0C0C0C0C / 0x3C3C3C3C 6410 // [4] = 0x00F000F0 / 0x0F000F00 6411 // [8] = 0x0000FF00 / 0x00FF0000 6412 // [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64) 6413 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) { 6414 // These are the unshifted masks which we use to match bit-manipulation 6415 // patterns. They may be shifted left in certain circumstances. 6416 static const uint64_t BitmanipMasks[] = { 6417 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL, 6418 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL}; 6419 6420 return matchRISCVBitmanipPat(Op, BitmanipMasks); 6421 } 6422 6423 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x) 6424 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, 6425 const RISCVSubtarget &Subtarget) { 6426 assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson"); 6427 EVT VT = Op.getValueType(); 6428 6429 if (VT != MVT::i32 && VT != Subtarget.getXLenVT()) 6430 return SDValue(); 6431 6432 SDValue Op0 = Op.getOperand(0); 6433 SDValue Op1 = Op.getOperand(1); 6434 6435 // Or is commutable so canonicalize the second OR to the LHS. 6436 if (Op0.getOpcode() != ISD::OR) 6437 std::swap(Op0, Op1); 6438 if (Op0.getOpcode() != ISD::OR) 6439 return SDValue(); 6440 6441 // We found an inner OR, so our operands are the operands of the inner OR 6442 // and the other operand of the outer OR. 6443 SDValue A = Op0.getOperand(0); 6444 SDValue B = Op0.getOperand(1); 6445 SDValue C = Op1; 6446 6447 auto Match1 = matchSHFLPat(A); 6448 auto Match2 = matchSHFLPat(B); 6449 6450 // If neither matched, we failed. 6451 if (!Match1 && !Match2) 6452 return SDValue(); 6453 6454 // We had at least one match. if one failed, try the remaining C operand. 6455 if (!Match1) { 6456 std::swap(A, C); 6457 Match1 = matchSHFLPat(A); 6458 if (!Match1) 6459 return SDValue(); 6460 } else if (!Match2) { 6461 std::swap(B, C); 6462 Match2 = matchSHFLPat(B); 6463 if (!Match2) 6464 return SDValue(); 6465 } 6466 assert(Match1 && Match2); 6467 6468 // Make sure our matches pair up. 6469 if (!Match1->formsPairWith(*Match2)) 6470 return SDValue(); 6471 6472 // All the remains is to make sure C is an AND with the same input, that masks 6473 // out the bits that are being shuffled. 6474 if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) || 6475 C.getOperand(0) != Match1->Op) 6476 return SDValue(); 6477 6478 uint64_t Mask = C.getConstantOperandVal(1); 6479 6480 static const uint64_t BitmanipMasks[] = { 6481 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL, 6482 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL, 6483 }; 6484 6485 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32; 6486 unsigned MaskIdx = Log2_32(Match1->ShAmt); 6487 uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width); 6488 6489 if (Mask != ExpMask) 6490 return SDValue(); 6491 6492 SDLoc DL(Op); 6493 return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op, 6494 DAG.getConstant(Match1->ShAmt, DL, VT)); 6495 } 6496 6497 // Optimize (add (shl x, c0), (shl y, c1)) -> 6498 // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. 6499 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, 6500 const RISCVSubtarget &Subtarget) { 6501 // Perform this optimization only in the zba extension. 6502 if (!Subtarget.hasStdExtZba()) 6503 return SDValue(); 6504 6505 // Skip for vector types and larger types. 6506 EVT VT = N->getValueType(0); 6507 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 6508 return SDValue(); 6509 6510 // The two operand nodes must be SHL and have no other use. 6511 SDValue N0 = N->getOperand(0); 6512 SDValue N1 = N->getOperand(1); 6513 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || 6514 !N0->hasOneUse() || !N1->hasOneUse()) 6515 return SDValue(); 6516 6517 // Check c0 and c1. 6518 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 6519 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 6520 if (!N0C || !N1C) 6521 return SDValue(); 6522 int64_t C0 = N0C->getSExtValue(); 6523 int64_t C1 = N1C->getSExtValue(); 6524 if (C0 <= 0 || C1 <= 0) 6525 return SDValue(); 6526 6527 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. 6528 int64_t Bits = std::min(C0, C1); 6529 int64_t Diff = std::abs(C0 - C1); 6530 if (Diff != 1 && Diff != 2 && Diff != 3) 6531 return SDValue(); 6532 6533 // Build nodes. 6534 SDLoc DL(N); 6535 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0); 6536 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0); 6537 SDValue NA0 = 6538 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT)); 6539 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS); 6540 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT)); 6541 } 6542 6543 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is 6544 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. 6545 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does 6546 // not undo itself, but they are redundant. 6547 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { 6548 SDValue Src = N->getOperand(0); 6549 6550 if (Src.getOpcode() != N->getOpcode()) 6551 return SDValue(); 6552 6553 if (!isa<ConstantSDNode>(N->getOperand(1)) || 6554 !isa<ConstantSDNode>(Src.getOperand(1))) 6555 return SDValue(); 6556 6557 unsigned ShAmt1 = N->getConstantOperandVal(1); 6558 unsigned ShAmt2 = Src.getConstantOperandVal(1); 6559 Src = Src.getOperand(0); 6560 6561 unsigned CombinedShAmt; 6562 if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW) 6563 CombinedShAmt = ShAmt1 | ShAmt2; 6564 else 6565 CombinedShAmt = ShAmt1 ^ ShAmt2; 6566 6567 if (CombinedShAmt == 0) 6568 return Src; 6569 6570 SDLoc DL(N); 6571 return DAG.getNode( 6572 N->getOpcode(), DL, N->getValueType(0), Src, 6573 DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType())); 6574 } 6575 6576 // Combine a constant select operand into its use: 6577 // 6578 // (and (select cond, -1, c), x) 6579 // -> (select cond, x, (and x, c)) [AllOnes=1] 6580 // (or (select cond, 0, c), x) 6581 // -> (select cond, x, (or x, c)) [AllOnes=0] 6582 // (xor (select cond, 0, c), x) 6583 // -> (select cond, x, (xor x, c)) [AllOnes=0] 6584 // (add (select cond, 0, c), x) 6585 // -> (select cond, x, (add x, c)) [AllOnes=0] 6586 // (sub x, (select cond, 0, c)) 6587 // -> (select cond, x, (sub x, c)) [AllOnes=0] 6588 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 6589 SelectionDAG &DAG, bool AllOnes) { 6590 EVT VT = N->getValueType(0); 6591 6592 // Skip vectors. 6593 if (VT.isVector()) 6594 return SDValue(); 6595 6596 if ((Slct.getOpcode() != ISD::SELECT && 6597 Slct.getOpcode() != RISCVISD::SELECT_CC) || 6598 !Slct.hasOneUse()) 6599 return SDValue(); 6600 6601 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) { 6602 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); 6603 }; 6604 6605 bool SwapSelectOps; 6606 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0; 6607 SDValue TrueVal = Slct.getOperand(1 + OpOffset); 6608 SDValue FalseVal = Slct.getOperand(2 + OpOffset); 6609 SDValue NonConstantVal; 6610 if (isZeroOrAllOnes(TrueVal, AllOnes)) { 6611 SwapSelectOps = false; 6612 NonConstantVal = FalseVal; 6613 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) { 6614 SwapSelectOps = true; 6615 NonConstantVal = TrueVal; 6616 } else 6617 return SDValue(); 6618 6619 // Slct is now know to be the desired identity constant when CC is true. 6620 TrueVal = OtherOp; 6621 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); 6622 // Unless SwapSelectOps says the condition should be false. 6623 if (SwapSelectOps) 6624 std::swap(TrueVal, FalseVal); 6625 6626 if (Slct.getOpcode() == RISCVISD::SELECT_CC) 6627 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT, 6628 {Slct.getOperand(0), Slct.getOperand(1), 6629 Slct.getOperand(2), TrueVal, FalseVal}); 6630 6631 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 6632 {Slct.getOperand(0), TrueVal, FalseVal}); 6633 } 6634 6635 // Attempt combineSelectAndUse on each operand of a commutative operator N. 6636 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, 6637 bool AllOnes) { 6638 SDValue N0 = N->getOperand(0); 6639 SDValue N1 = N->getOperand(1); 6640 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes)) 6641 return Result; 6642 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes)) 6643 return Result; 6644 return SDValue(); 6645 } 6646 6647 // Transform (add (mul x, c0), c1) -> 6648 // (add (mul (add x, c1/c0), c0), c1%c0). 6649 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case 6650 // that should be excluded is when c0*(c1/c0) is simm12, which will lead 6651 // to an infinite loop in DAGCombine if transformed. 6652 // Or transform (add (mul x, c0), c1) -> 6653 // (add (mul (add x, c1/c0+1), c0), c1%c0-c0), 6654 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner 6655 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will 6656 // lead to an infinite loop in DAGCombine if transformed. 6657 // Or transform (add (mul x, c0), c1) -> 6658 // (add (mul (add x, c1/c0-1), c0), c1%c0+c0), 6659 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner 6660 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will 6661 // lead to an infinite loop in DAGCombine if transformed. 6662 // Or transform (add (mul x, c0), c1) -> 6663 // (mul (add x, c1/c0), c0). 6664 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. 6665 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, 6666 const RISCVSubtarget &Subtarget) { 6667 // Skip for vector types and larger types. 6668 EVT VT = N->getValueType(0); 6669 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) 6670 return SDValue(); 6671 // The first operand node must be a MUL and has no other use. 6672 SDValue N0 = N->getOperand(0); 6673 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL) 6674 return SDValue(); 6675 // Check if c0 and c1 match above conditions. 6676 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 6677 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6678 if (!N0C || !N1C) 6679 return SDValue(); 6680 int64_t C0 = N0C->getSExtValue(); 6681 int64_t C1 = N1C->getSExtValue(); 6682 int64_t CA, CB; 6683 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1)) 6684 return SDValue(); 6685 // Search for proper CA (non-zero) and CB that both are simm12. 6686 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) && 6687 !isInt<12>(C0 * (C1 / C0))) { 6688 CA = C1 / C0; 6689 CB = C1 % C0; 6690 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) && 6691 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) { 6692 CA = C1 / C0 + 1; 6693 CB = C1 % C0 - C0; 6694 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) && 6695 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) { 6696 CA = C1 / C0 - 1; 6697 CB = C1 % C0 + C0; 6698 } else 6699 return SDValue(); 6700 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). 6701 SDLoc DL(N); 6702 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0), 6703 DAG.getConstant(CA, DL, VT)); 6704 SDValue New1 = 6705 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT)); 6706 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT)); 6707 } 6708 6709 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, 6710 const RISCVSubtarget &Subtarget) { 6711 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) 6712 return V; 6713 if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) 6714 return V; 6715 // fold (add (select lhs, rhs, cc, 0, y), x) -> 6716 // (select lhs, rhs, cc, x, (add x, y)) 6717 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); 6718 } 6719 6720 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) { 6721 // fold (sub x, (select lhs, rhs, cc, 0, y)) -> 6722 // (select lhs, rhs, cc, x, (sub x, y)) 6723 SDValue N0 = N->getOperand(0); 6724 SDValue N1 = N->getOperand(1); 6725 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false); 6726 } 6727 6728 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) { 6729 // fold (and (select lhs, rhs, cc, -1, y), x) -> 6730 // (select lhs, rhs, cc, x, (and x, y)) 6731 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true); 6732 } 6733 6734 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 6735 const RISCVSubtarget &Subtarget) { 6736 if (Subtarget.hasStdExtZbp()) { 6737 if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget)) 6738 return GREV; 6739 if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget)) 6740 return GORC; 6741 if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget)) 6742 return SHFL; 6743 } 6744 6745 // fold (or (select cond, 0, y), x) -> 6746 // (select cond, x, (or x, y)) 6747 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); 6748 } 6749 6750 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) { 6751 // fold (xor (select cond, 0, y), x) -> 6752 // (select cond, x, (xor x, y)) 6753 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); 6754 } 6755 6756 // Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND 6757 // has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free 6758 // by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be 6759 // removed during type legalization leaving an ADD/SUB/MUL use that won't use 6760 // ADDW/SUBW/MULW. 6761 static SDValue performANY_EXTENDCombine(SDNode *N, 6762 TargetLowering::DAGCombinerInfo &DCI, 6763 const RISCVSubtarget &Subtarget) { 6764 if (!Subtarget.is64Bit()) 6765 return SDValue(); 6766 6767 SelectionDAG &DAG = DCI.DAG; 6768 6769 SDValue Src = N->getOperand(0); 6770 EVT VT = N->getValueType(0); 6771 if (VT != MVT::i64 || Src.getValueType() != MVT::i32) 6772 return SDValue(); 6773 6774 // The opcode must be one that can implicitly sign_extend. 6775 // FIXME: Additional opcodes. 6776 switch (Src.getOpcode()) { 6777 default: 6778 return SDValue(); 6779 case ISD::MUL: 6780 if (!Subtarget.hasStdExtM()) 6781 return SDValue(); 6782 LLVM_FALLTHROUGH; 6783 case ISD::ADD: 6784 case ISD::SUB: 6785 break; 6786 } 6787 6788 // Only handle cases where the result is used by a CopyToReg. That likely 6789 // means the value is a liveout of the basic block. This helps prevent 6790 // infinite combine loops like PR51206. 6791 if (none_of(N->uses(), 6792 [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; })) 6793 return SDValue(); 6794 6795 SmallVector<SDNode *, 4> SetCCs; 6796 for (SDNode::use_iterator UI = Src.getNode()->use_begin(), 6797 UE = Src.getNode()->use_end(); 6798 UI != UE; ++UI) { 6799 SDNode *User = *UI; 6800 if (User == N) 6801 continue; 6802 if (UI.getUse().getResNo() != Src.getResNo()) 6803 continue; 6804 // All i32 setccs are legalized by sign extending operands. 6805 if (User->getOpcode() == ISD::SETCC) { 6806 SetCCs.push_back(User); 6807 continue; 6808 } 6809 // We don't know if we can extend this user. 6810 break; 6811 } 6812 6813 // If we don't have any SetCCs, this isn't worthwhile. 6814 if (SetCCs.empty()) 6815 return SDValue(); 6816 6817 SDLoc DL(N); 6818 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src); 6819 DCI.CombineTo(N, SExt); 6820 6821 // Promote all the setccs. 6822 for (SDNode *SetCC : SetCCs) { 6823 SmallVector<SDValue, 4> Ops; 6824 6825 for (unsigned j = 0; j != 2; ++j) { 6826 SDValue SOp = SetCC->getOperand(j); 6827 if (SOp == Src) 6828 Ops.push_back(SExt); 6829 else 6830 Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp)); 6831 } 6832 6833 Ops.push_back(SetCC->getOperand(2)); 6834 DCI.CombineTo(SetCC, 6835 DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 6836 } 6837 return SDValue(N, 0); 6838 } 6839 6840 // Try to form VWMUL or VWMULU. 6841 // FIXME: Support VWMULSU. 6842 static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1, 6843 SelectionDAG &DAG) { 6844 assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode"); 6845 bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL; 6846 bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL; 6847 if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse()) 6848 return SDValue(); 6849 6850 SDValue Mask = N->getOperand(2); 6851 SDValue VL = N->getOperand(3); 6852 6853 // Make sure the mask and VL match. 6854 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL) 6855 return SDValue(); 6856 6857 MVT VT = N->getSimpleValueType(0); 6858 6859 // Determine the narrow size for a widening multiply. 6860 unsigned NarrowSize = VT.getScalarSizeInBits() / 2; 6861 MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize), 6862 VT.getVectorElementCount()); 6863 6864 SDLoc DL(N); 6865 6866 // See if the other operand is the same opcode. 6867 if (Op0.getOpcode() == Op1.getOpcode()) { 6868 if (!Op1.hasOneUse()) 6869 return SDValue(); 6870 6871 // Make sure the mask and VL match. 6872 if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL) 6873 return SDValue(); 6874 6875 Op1 = Op1.getOperand(0); 6876 } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) { 6877 // The operand is a splat of a scalar. 6878 6879 // The VL must be the same. 6880 if (Op1.getOperand(1) != VL) 6881 return SDValue(); 6882 6883 // Get the scalar value. 6884 Op1 = Op1.getOperand(0); 6885 6886 // See if have enough sign bits or zero bits in the scalar to use a 6887 // widening multiply by splatting to smaller element size. 6888 unsigned EltBits = VT.getScalarSizeInBits(); 6889 unsigned ScalarBits = Op1.getValueSizeInBits(); 6890 // Make sure we're getting all element bits from the scalar register. 6891 // FIXME: Support implicit sign extension of vmv.v.x? 6892 if (ScalarBits < EltBits) 6893 return SDValue(); 6894 6895 if (IsSignExt) { 6896 if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize)) 6897 return SDValue(); 6898 } else { 6899 APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize); 6900 if (!DAG.MaskedValueIsZero(Op1, Mask)) 6901 return SDValue(); 6902 } 6903 6904 Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL); 6905 } else 6906 return SDValue(); 6907 6908 Op0 = Op0.getOperand(0); 6909 6910 // Re-introduce narrower extends if needed. 6911 unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL; 6912 if (Op0.getValueType() != NarrowVT) 6913 Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL); 6914 if (Op1.getValueType() != NarrowVT) 6915 Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL); 6916 6917 unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; 6918 return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL); 6919 } 6920 6921 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 6922 DAGCombinerInfo &DCI) const { 6923 SelectionDAG &DAG = DCI.DAG; 6924 6925 // Helper to call SimplifyDemandedBits on an operand of N where only some low 6926 // bits are demanded. N will be added to the Worklist if it was not deleted. 6927 // Caller should return SDValue(N, 0) if this returns true. 6928 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) { 6929 SDValue Op = N->getOperand(OpNo); 6930 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits); 6931 if (!SimplifyDemandedBits(Op, Mask, DCI)) 6932 return false; 6933 6934 if (N->getOpcode() != ISD::DELETED_NODE) 6935 DCI.AddToWorklist(N); 6936 return true; 6937 }; 6938 6939 switch (N->getOpcode()) { 6940 default: 6941 break; 6942 case RISCVISD::SplitF64: { 6943 SDValue Op0 = N->getOperand(0); 6944 // If the input to SplitF64 is just BuildPairF64 then the operation is 6945 // redundant. Instead, use BuildPairF64's operands directly. 6946 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 6947 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 6948 6949 SDLoc DL(N); 6950 6951 // It's cheaper to materialise two 32-bit integers than to load a double 6952 // from the constant pool and transfer it to integer registers through the 6953 // stack. 6954 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 6955 APInt V = C->getValueAPF().bitcastToAPInt(); 6956 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 6957 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 6958 return DCI.CombineTo(N, Lo, Hi); 6959 } 6960 6961 // This is a target-specific version of a DAGCombine performed in 6962 // DAGCombiner::visitBITCAST. It performs the equivalent of: 6963 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 6964 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 6965 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 6966 !Op0.getNode()->hasOneUse()) 6967 break; 6968 SDValue NewSplitF64 = 6969 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 6970 Op0.getOperand(0)); 6971 SDValue Lo = NewSplitF64.getValue(0); 6972 SDValue Hi = NewSplitF64.getValue(1); 6973 APInt SignBit = APInt::getSignMask(32); 6974 if (Op0.getOpcode() == ISD::FNEG) { 6975 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 6976 DAG.getConstant(SignBit, DL, MVT::i32)); 6977 return DCI.CombineTo(N, Lo, NewHi); 6978 } 6979 assert(Op0.getOpcode() == ISD::FABS); 6980 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 6981 DAG.getConstant(~SignBit, DL, MVT::i32)); 6982 return DCI.CombineTo(N, Lo, NewHi); 6983 } 6984 case RISCVISD::SLLW: 6985 case RISCVISD::SRAW: 6986 case RISCVISD::SRLW: 6987 case RISCVISD::ROLW: 6988 case RISCVISD::RORW: { 6989 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 6990 if (SimplifyDemandedLowBitsHelper(0, 32) || 6991 SimplifyDemandedLowBitsHelper(1, 5)) 6992 return SDValue(N, 0); 6993 break; 6994 } 6995 case RISCVISD::CLZW: 6996 case RISCVISD::CTZW: { 6997 // Only the lower 32 bits of the first operand are read 6998 if (SimplifyDemandedLowBitsHelper(0, 32)) 6999 return SDValue(N, 0); 7000 break; 7001 } 7002 case RISCVISD::FSL: 7003 case RISCVISD::FSR: { 7004 // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read. 7005 unsigned BitWidth = N->getOperand(2).getValueSizeInBits(); 7006 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 7007 if (SimplifyDemandedLowBitsHelper(2, Log2_32(BitWidth) + 1)) 7008 return SDValue(N, 0); 7009 break; 7010 } 7011 case RISCVISD::FSLW: 7012 case RISCVISD::FSRW: { 7013 // Only the lower 32 bits of Values and lower 6 bits of shift amount are 7014 // read. 7015 if (SimplifyDemandedLowBitsHelper(0, 32) || 7016 SimplifyDemandedLowBitsHelper(1, 32) || 7017 SimplifyDemandedLowBitsHelper(2, 6)) 7018 return SDValue(N, 0); 7019 break; 7020 } 7021 case RISCVISD::GREV: 7022 case RISCVISD::GORC: { 7023 // Only the lower log2(Bitwidth) bits of the the shift amount are read. 7024 unsigned BitWidth = N->getOperand(1).getValueSizeInBits(); 7025 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 7026 if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth))) 7027 return SDValue(N, 0); 7028 7029 return combineGREVI_GORCI(N, DCI.DAG); 7030 } 7031 case RISCVISD::GREVW: 7032 case RISCVISD::GORCW: { 7033 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 7034 if (SimplifyDemandedLowBitsHelper(0, 32) || 7035 SimplifyDemandedLowBitsHelper(1, 5)) 7036 return SDValue(N, 0); 7037 7038 return combineGREVI_GORCI(N, DCI.DAG); 7039 } 7040 case RISCVISD::SHFL: 7041 case RISCVISD::UNSHFL: { 7042 // Only the lower log2(Bitwidth)-1 bits of the the shift amount are read. 7043 unsigned BitWidth = N->getOperand(1).getValueSizeInBits(); 7044 assert(isPowerOf2_32(BitWidth) && "Unexpected bit width"); 7045 if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1)) 7046 return SDValue(N, 0); 7047 7048 break; 7049 } 7050 case RISCVISD::SHFLW: 7051 case RISCVISD::UNSHFLW: { 7052 // Only the lower 32 bits of LHS and lower 4 bits of RHS are read. 7053 SDValue LHS = N->getOperand(0); 7054 SDValue RHS = N->getOperand(1); 7055 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 7056 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4); 7057 if (SimplifyDemandedLowBitsHelper(0, 32) || 7058 SimplifyDemandedLowBitsHelper(1, 4)) 7059 return SDValue(N, 0); 7060 7061 break; 7062 } 7063 case RISCVISD::BCOMPRESSW: 7064 case RISCVISD::BDECOMPRESSW: { 7065 // Only the lower 32 bits of LHS and RHS are read. 7066 if (SimplifyDemandedLowBitsHelper(0, 32) || 7067 SimplifyDemandedLowBitsHelper(1, 32)) 7068 return SDValue(N, 0); 7069 7070 break; 7071 } 7072 case RISCVISD::FMV_X_ANYEXTH: 7073 case RISCVISD::FMV_X_ANYEXTW_RV64: { 7074 SDLoc DL(N); 7075 SDValue Op0 = N->getOperand(0); 7076 MVT VT = N->getSimpleValueType(0); 7077 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 7078 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 7079 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. 7080 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 && 7081 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) || 7082 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH && 7083 Op0->getOpcode() == RISCVISD::FMV_H_X)) { 7084 assert(Op0.getOperand(0).getValueType() == VT && 7085 "Unexpected value type!"); 7086 return Op0.getOperand(0); 7087 } 7088 7089 // This is a target-specific version of a DAGCombine performed in 7090 // DAGCombiner::visitBITCAST. It performs the equivalent of: 7091 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 7092 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 7093 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 7094 !Op0.getNode()->hasOneUse()) 7095 break; 7096 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0)); 7097 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16; 7098 APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits()); 7099 if (Op0.getOpcode() == ISD::FNEG) 7100 return DAG.getNode(ISD::XOR, DL, VT, NewFMV, 7101 DAG.getConstant(SignBit, DL, VT)); 7102 7103 assert(Op0.getOpcode() == ISD::FABS); 7104 return DAG.getNode(ISD::AND, DL, VT, NewFMV, 7105 DAG.getConstant(~SignBit, DL, VT)); 7106 } 7107 case ISD::ADD: 7108 return performADDCombine(N, DAG, Subtarget); 7109 case ISD::SUB: 7110 return performSUBCombine(N, DAG); 7111 case ISD::AND: 7112 return performANDCombine(N, DAG); 7113 case ISD::OR: 7114 return performORCombine(N, DAG, Subtarget); 7115 case ISD::XOR: 7116 return performXORCombine(N, DAG); 7117 case ISD::ANY_EXTEND: 7118 return performANY_EXTENDCombine(N, DCI, Subtarget); 7119 case ISD::ZERO_EXTEND: 7120 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during 7121 // type legalization. This is safe because fp_to_uint produces poison if 7122 // it overflows. 7123 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit() && 7124 N->getOperand(0).getOpcode() == ISD::FP_TO_UINT && 7125 isTypeLegal(N->getOperand(0).getOperand(0).getValueType())) 7126 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, 7127 N->getOperand(0).getOperand(0)); 7128 return SDValue(); 7129 case RISCVISD::SELECT_CC: { 7130 // Transform 7131 SDValue LHS = N->getOperand(0); 7132 SDValue RHS = N->getOperand(1); 7133 SDValue TrueV = N->getOperand(3); 7134 SDValue FalseV = N->getOperand(4); 7135 7136 // If the True and False values are the same, we don't need a select_cc. 7137 if (TrueV == FalseV) 7138 return TrueV; 7139 7140 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); 7141 if (!ISD::isIntEqualitySetCC(CCVal)) 7142 break; 7143 7144 // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) -> 7145 // (select_cc X, Y, lt, trueV, falseV) 7146 // Sometimes the setcc is introduced after select_cc has been formed. 7147 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 7148 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 7149 // If we're looking for eq 0 instead of ne 0, we need to invert the 7150 // condition. 7151 bool Invert = CCVal == ISD::SETEQ; 7152 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 7153 if (Invert) 7154 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 7155 7156 SDLoc DL(N); 7157 RHS = LHS.getOperand(1); 7158 LHS = LHS.getOperand(0); 7159 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 7160 7161 SDValue TargetCC = DAG.getCondCode(CCVal); 7162 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 7163 {LHS, RHS, TargetCC, TrueV, FalseV}); 7164 } 7165 7166 // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) -> 7167 // (select_cc X, Y, eq/ne, trueV, falseV) 7168 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 7169 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0), 7170 {LHS.getOperand(0), LHS.getOperand(1), 7171 N->getOperand(2), TrueV, FalseV}); 7172 // (select_cc X, 1, setne, trueV, falseV) -> 7173 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. 7174 // This can occur when legalizing some floating point comparisons. 7175 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 7176 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 7177 SDLoc DL(N); 7178 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 7179 SDValue TargetCC = DAG.getCondCode(CCVal); 7180 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 7181 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), 7182 {LHS, RHS, TargetCC, TrueV, FalseV}); 7183 } 7184 7185 break; 7186 } 7187 case RISCVISD::BR_CC: { 7188 SDValue LHS = N->getOperand(1); 7189 SDValue RHS = N->getOperand(2); 7190 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get(); 7191 if (!ISD::isIntEqualitySetCC(CCVal)) 7192 break; 7193 7194 // Fold (br_cc (setlt X, Y), 0, ne, dest) -> 7195 // (br_cc X, Y, lt, dest) 7196 // Sometimes the setcc is introduced after br_cc has been formed. 7197 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && 7198 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { 7199 // If we're looking for eq 0 instead of ne 0, we need to invert the 7200 // condition. 7201 bool Invert = CCVal == ISD::SETEQ; 7202 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); 7203 if (Invert) 7204 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 7205 7206 SDLoc DL(N); 7207 RHS = LHS.getOperand(1); 7208 LHS = LHS.getOperand(0); 7209 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); 7210 7211 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 7212 N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal), 7213 N->getOperand(4)); 7214 } 7215 7216 // Fold (br_cc (xor X, Y), 0, eq/ne, dest) -> 7217 // (br_cc X, Y, eq/ne, trueV, falseV) 7218 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) 7219 return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0), 7220 N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1), 7221 N->getOperand(3), N->getOperand(4)); 7222 7223 // (br_cc X, 1, setne, br_cc) -> 7224 // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1. 7225 // This can occur when legalizing some floating point comparisons. 7226 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); 7227 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { 7228 SDLoc DL(N); 7229 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); 7230 SDValue TargetCC = DAG.getCondCode(CCVal); 7231 RHS = DAG.getConstant(0, DL, LHS.getValueType()); 7232 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), 7233 N->getOperand(0), LHS, RHS, TargetCC, 7234 N->getOperand(4)); 7235 } 7236 break; 7237 } 7238 case ISD::FCOPYSIGN: { 7239 EVT VT = N->getValueType(0); 7240 if (!VT.isVector()) 7241 break; 7242 // There is a form of VFSGNJ which injects the negated sign of its second 7243 // operand. Try and bubble any FNEG up after the extend/round to produce 7244 // this optimized pattern. Avoid modifying cases where FP_ROUND and 7245 // TRUNC=1. 7246 SDValue In2 = N->getOperand(1); 7247 // Avoid cases where the extend/round has multiple uses, as duplicating 7248 // those is typically more expensive than removing a fneg. 7249 if (!In2.hasOneUse()) 7250 break; 7251 if (In2.getOpcode() != ISD::FP_EXTEND && 7252 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0)) 7253 break; 7254 In2 = In2.getOperand(0); 7255 if (In2.getOpcode() != ISD::FNEG) 7256 break; 7257 SDLoc DL(N); 7258 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT); 7259 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), 7260 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); 7261 } 7262 case ISD::MGATHER: 7263 case ISD::MSCATTER: 7264 case ISD::VP_GATHER: 7265 case ISD::VP_SCATTER: { 7266 if (!DCI.isBeforeLegalize()) 7267 break; 7268 SDValue Index, ScaleOp; 7269 bool IsIndexScaled = false; 7270 bool IsIndexSigned = false; 7271 if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) { 7272 Index = VPGSN->getIndex(); 7273 ScaleOp = VPGSN->getScale(); 7274 IsIndexScaled = VPGSN->isIndexScaled(); 7275 IsIndexSigned = VPGSN->isIndexSigned(); 7276 } else { 7277 const auto *MGSN = cast<MaskedGatherScatterSDNode>(N); 7278 Index = MGSN->getIndex(); 7279 ScaleOp = MGSN->getScale(); 7280 IsIndexScaled = MGSN->isIndexScaled(); 7281 IsIndexSigned = MGSN->isIndexSigned(); 7282 } 7283 EVT IndexVT = Index.getValueType(); 7284 MVT XLenVT = Subtarget.getXLenVT(); 7285 // RISCV indexed loads only support the "unsigned unscaled" addressing 7286 // mode, so anything else must be manually legalized. 7287 bool NeedsIdxLegalization = 7288 IsIndexScaled || 7289 (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT)); 7290 if (!NeedsIdxLegalization) 7291 break; 7292 7293 SDLoc DL(N); 7294 7295 // Any index legalization should first promote to XLenVT, so we don't lose 7296 // bits when scaling. This may create an illegal index type so we let 7297 // LLVM's legalization take care of the splitting. 7298 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet. 7299 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) { 7300 IndexVT = IndexVT.changeVectorElementType(XLenVT); 7301 Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 7302 DL, IndexVT, Index); 7303 } 7304 7305 unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue(); 7306 if (IsIndexScaled && Scale != 1) { 7307 // Manually scale the indices by the element size. 7308 // TODO: Sanitize the scale operand here? 7309 // TODO: For VP nodes, should we use VP_SHL here? 7310 assert(isPowerOf2_32(Scale) && "Expecting power-of-two types"); 7311 SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT); 7312 Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale); 7313 } 7314 7315 ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED; 7316 if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N)) 7317 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL, 7318 {VPGN->getChain(), VPGN->getBasePtr(), Index, 7319 VPGN->getScale(), VPGN->getMask(), 7320 VPGN->getVectorLength()}, 7321 VPGN->getMemOperand(), NewIndexTy); 7322 if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N)) 7323 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL, 7324 {VPSN->getChain(), VPSN->getValue(), 7325 VPSN->getBasePtr(), Index, VPSN->getScale(), 7326 VPSN->getMask(), VPSN->getVectorLength()}, 7327 VPSN->getMemOperand(), NewIndexTy); 7328 if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) 7329 return DAG.getMaskedGather( 7330 N->getVTList(), MGN->getMemoryVT(), DL, 7331 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(), 7332 MGN->getBasePtr(), Index, MGN->getScale()}, 7333 MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType()); 7334 const auto *MSN = cast<MaskedScatterSDNode>(N); 7335 return DAG.getMaskedScatter( 7336 N->getVTList(), MSN->getMemoryVT(), DL, 7337 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(), 7338 Index, MSN->getScale()}, 7339 MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore()); 7340 } 7341 case RISCVISD::SRA_VL: 7342 case RISCVISD::SRL_VL: 7343 case RISCVISD::SHL_VL: { 7344 SDValue ShAmt = N->getOperand(1); 7345 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 7346 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 7347 SDLoc DL(N); 7348 SDValue VL = N->getOperand(3); 7349 EVT VT = N->getValueType(0); 7350 ShAmt = 7351 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL); 7352 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt, 7353 N->getOperand(2), N->getOperand(3)); 7354 } 7355 break; 7356 } 7357 case ISD::SRA: 7358 case ISD::SRL: 7359 case ISD::SHL: { 7360 SDValue ShAmt = N->getOperand(1); 7361 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { 7362 // We don't need the upper 32 bits of a 64-bit element for a shift amount. 7363 SDLoc DL(N); 7364 EVT VT = N->getValueType(0); 7365 ShAmt = 7366 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0)); 7367 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt); 7368 } 7369 break; 7370 } 7371 case RISCVISD::MUL_VL: { 7372 SDValue Op0 = N->getOperand(0); 7373 SDValue Op1 = N->getOperand(1); 7374 if (SDValue V = combineMUL_VLToVWMUL(N, Op0, Op1, DAG)) 7375 return V; 7376 if (SDValue V = combineMUL_VLToVWMUL(N, Op1, Op0, DAG)) 7377 return V; 7378 return SDValue(); 7379 } 7380 case ISD::STORE: { 7381 auto *Store = cast<StoreSDNode>(N); 7382 SDValue Val = Store->getValue(); 7383 // Combine store of vmv.x.s to vse with VL of 1. 7384 // FIXME: Support FP. 7385 if (Val.getOpcode() == RISCVISD::VMV_X_S) { 7386 SDValue Src = Val.getOperand(0); 7387 EVT VecVT = Src.getValueType(); 7388 EVT MemVT = Store->getMemoryVT(); 7389 // The memory VT and the element type must match. 7390 if (VecVT.getVectorElementType() == MemVT) { 7391 SDLoc DL(N); 7392 MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); 7393 return DAG.getStoreVP(Store->getChain(), DL, Src, Store->getBasePtr(), 7394 DAG.getConstant(1, DL, MaskVT), 7395 DAG.getConstant(1, DL, Subtarget.getXLenVT()), 7396 Store->getPointerInfo(), 7397 Store->getOriginalAlign(), 7398 Store->getMemOperand()->getFlags()); 7399 } 7400 } 7401 7402 break; 7403 } 7404 } 7405 7406 return SDValue(); 7407 } 7408 7409 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 7410 const SDNode *N, CombineLevel Level) const { 7411 // The following folds are only desirable if `(OP _, c1 << c2)` can be 7412 // materialised in fewer instructions than `(OP _, c1)`: 7413 // 7414 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 7415 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 7416 SDValue N0 = N->getOperand(0); 7417 EVT Ty = N0.getValueType(); 7418 if (Ty.isScalarInteger() && 7419 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 7420 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 7421 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7422 if (C1 && C2) { 7423 const APInt &C1Int = C1->getAPIntValue(); 7424 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 7425 7426 // We can materialise `c1 << c2` into an add immediate, so it's "free", 7427 // and the combine should happen, to potentially allow further combines 7428 // later. 7429 if (ShiftedC1Int.getMinSignedBits() <= 64 && 7430 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 7431 return true; 7432 7433 // We can materialise `c1` in an add immediate, so it's "free", and the 7434 // combine should be prevented. 7435 if (C1Int.getMinSignedBits() <= 64 && 7436 isLegalAddImmediate(C1Int.getSExtValue())) 7437 return false; 7438 7439 // Neither constant will fit into an immediate, so find materialisation 7440 // costs. 7441 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 7442 Subtarget.getFeatureBits(), 7443 /*CompressionCost*/true); 7444 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 7445 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(), 7446 /*CompressionCost*/true); 7447 7448 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 7449 // combine should be prevented. 7450 if (C1Cost < ShiftedC1Cost) 7451 return false; 7452 } 7453 } 7454 return true; 7455 } 7456 7457 bool RISCVTargetLowering::targetShrinkDemandedConstant( 7458 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 7459 TargetLoweringOpt &TLO) const { 7460 // Delay this optimization as late as possible. 7461 if (!TLO.LegalOps) 7462 return false; 7463 7464 EVT VT = Op.getValueType(); 7465 if (VT.isVector()) 7466 return false; 7467 7468 // Only handle AND for now. 7469 if (Op.getOpcode() != ISD::AND) 7470 return false; 7471 7472 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 7473 if (!C) 7474 return false; 7475 7476 const APInt &Mask = C->getAPIntValue(); 7477 7478 // Clear all non-demanded bits initially. 7479 APInt ShrunkMask = Mask & DemandedBits; 7480 7481 // Try to make a smaller immediate by setting undemanded bits. 7482 7483 APInt ExpandedMask = Mask | ~DemandedBits; 7484 7485 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { 7486 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); 7487 }; 7488 auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool { 7489 if (NewMask == Mask) 7490 return true; 7491 SDLoc DL(Op); 7492 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); 7493 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); 7494 return TLO.CombineTo(Op, NewOp); 7495 }; 7496 7497 // If the shrunk mask fits in sign extended 12 bits, let the target 7498 // independent code apply it. 7499 if (ShrunkMask.isSignedIntN(12)) 7500 return false; 7501 7502 // Preserve (and X, 0xffff) when zext.h is supported. 7503 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { 7504 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); 7505 if (IsLegalMask(NewMask)) 7506 return UseMask(NewMask); 7507 } 7508 7509 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. 7510 if (VT == MVT::i64) { 7511 APInt NewMask = APInt(64, 0xffffffff); 7512 if (IsLegalMask(NewMask)) 7513 return UseMask(NewMask); 7514 } 7515 7516 // For the remaining optimizations, we need to be able to make a negative 7517 // number through a combination of mask and undemanded bits. 7518 if (!ExpandedMask.isNegative()) 7519 return false; 7520 7521 // What is the fewest number of bits we need to represent the negative number. 7522 unsigned MinSignedBits = ExpandedMask.getMinSignedBits(); 7523 7524 // Try to make a 12 bit negative immediate. If that fails try to make a 32 7525 // bit negative immediate unless the shrunk immediate already fits in 32 bits. 7526 APInt NewMask = ShrunkMask; 7527 if (MinSignedBits <= 12) 7528 NewMask.setBitsFrom(11); 7529 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) 7530 NewMask.setBitsFrom(31); 7531 else 7532 return false; 7533 7534 // Check that our new mask is a subset of the demanded mask. 7535 assert(IsLegalMask(NewMask)); 7536 return UseMask(NewMask); 7537 } 7538 7539 static void computeGREV(APInt &Src, unsigned ShAmt) { 7540 ShAmt &= Src.getBitWidth() - 1; 7541 uint64_t x = Src.getZExtValue(); 7542 if (ShAmt & 1) 7543 x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); 7544 if (ShAmt & 2) 7545 x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); 7546 if (ShAmt & 4) 7547 x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); 7548 if (ShAmt & 8) 7549 x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); 7550 if (ShAmt & 16) 7551 x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); 7552 if (ShAmt & 32) 7553 x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); 7554 Src = x; 7555 } 7556 7557 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 7558 KnownBits &Known, 7559 const APInt &DemandedElts, 7560 const SelectionDAG &DAG, 7561 unsigned Depth) const { 7562 unsigned BitWidth = Known.getBitWidth(); 7563 unsigned Opc = Op.getOpcode(); 7564 assert((Opc >= ISD::BUILTIN_OP_END || 7565 Opc == ISD::INTRINSIC_WO_CHAIN || 7566 Opc == ISD::INTRINSIC_W_CHAIN || 7567 Opc == ISD::INTRINSIC_VOID) && 7568 "Should use MaskedValueIsZero if you don't know whether Op" 7569 " is a target node!"); 7570 7571 Known.resetAll(); 7572 switch (Opc) { 7573 default: break; 7574 case RISCVISD::SELECT_CC: { 7575 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1); 7576 // If we don't know any bits, early out. 7577 if (Known.isUnknown()) 7578 break; 7579 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1); 7580 7581 // Only known if known in both the LHS and RHS. 7582 Known = KnownBits::commonBits(Known, Known2); 7583 break; 7584 } 7585 case RISCVISD::REMUW: { 7586 KnownBits Known2; 7587 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 7588 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 7589 // We only care about the lower 32 bits. 7590 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32)); 7591 // Restore the original width by sign extending. 7592 Known = Known.sext(BitWidth); 7593 break; 7594 } 7595 case RISCVISD::DIVUW: { 7596 KnownBits Known2; 7597 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 7598 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 7599 // We only care about the lower 32 bits. 7600 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32)); 7601 // Restore the original width by sign extending. 7602 Known = Known.sext(BitWidth); 7603 break; 7604 } 7605 case RISCVISD::CTZW: { 7606 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 7607 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); 7608 unsigned LowBits = Log2_32(PossibleTZ) + 1; 7609 Known.Zero.setBitsFrom(LowBits); 7610 break; 7611 } 7612 case RISCVISD::CLZW: { 7613 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 7614 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros(); 7615 unsigned LowBits = Log2_32(PossibleLZ) + 1; 7616 Known.Zero.setBitsFrom(LowBits); 7617 break; 7618 } 7619 case RISCVISD::GREV: 7620 case RISCVISD::GREVW: { 7621 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 7622 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 7623 if (Opc == RISCVISD::GREVW) 7624 Known = Known.trunc(32); 7625 unsigned ShAmt = C->getZExtValue(); 7626 computeGREV(Known.Zero, ShAmt); 7627 computeGREV(Known.One, ShAmt); 7628 if (Opc == RISCVISD::GREVW) 7629 Known = Known.sext(BitWidth); 7630 } 7631 break; 7632 } 7633 case RISCVISD::READ_VLENB: 7634 // We assume VLENB is at least 16 bytes. 7635 Known.Zero.setLowBits(4); 7636 // We assume VLENB is no more than 65536 / 8 bytes. 7637 Known.Zero.setBitsFrom(14); 7638 break; 7639 case ISD::INTRINSIC_W_CHAIN: { 7640 unsigned IntNo = Op.getConstantOperandVal(1); 7641 switch (IntNo) { 7642 default: 7643 // We can't do anything for most intrinsics. 7644 break; 7645 case Intrinsic::riscv_vsetvli: 7646 case Intrinsic::riscv_vsetvlimax: 7647 // Assume that VL output is positive and would fit in an int32_t. 7648 // TODO: VLEN might be capped at 16 bits in a future V spec update. 7649 if (BitWidth >= 32) 7650 Known.Zero.setBitsFrom(31); 7651 break; 7652 } 7653 break; 7654 } 7655 } 7656 } 7657 7658 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 7659 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 7660 unsigned Depth) const { 7661 switch (Op.getOpcode()) { 7662 default: 7663 break; 7664 case RISCVISD::SELECT_CC: { 7665 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1); 7666 if (Tmp == 1) return 1; // Early out. 7667 unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1); 7668 return std::min(Tmp, Tmp2); 7669 } 7670 case RISCVISD::SLLW: 7671 case RISCVISD::SRAW: 7672 case RISCVISD::SRLW: 7673 case RISCVISD::DIVW: 7674 case RISCVISD::DIVUW: 7675 case RISCVISD::REMUW: 7676 case RISCVISD::ROLW: 7677 case RISCVISD::RORW: 7678 case RISCVISD::GREVW: 7679 case RISCVISD::GORCW: 7680 case RISCVISD::FSLW: 7681 case RISCVISD::FSRW: 7682 case RISCVISD::SHFLW: 7683 case RISCVISD::UNSHFLW: 7684 case RISCVISD::BCOMPRESSW: 7685 case RISCVISD::BDECOMPRESSW: 7686 case RISCVISD::FCVT_W_RTZ_RV64: 7687 case RISCVISD::FCVT_WU_RTZ_RV64: 7688 // TODO: As the result is sign-extended, this is conservatively correct. A 7689 // more precise answer could be calculated for SRAW depending on known 7690 // bits in the shift amount. 7691 return 33; 7692 case RISCVISD::SHFL: 7693 case RISCVISD::UNSHFL: { 7694 // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word 7695 // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but 7696 // will stay within the upper 32 bits. If there were more than 32 sign bits 7697 // before there will be at least 33 sign bits after. 7698 if (Op.getValueType() == MVT::i64 && 7699 isa<ConstantSDNode>(Op.getOperand(1)) && 7700 (Op.getConstantOperandVal(1) & 0x10) == 0) { 7701 unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 7702 if (Tmp > 32) 7703 return 33; 7704 } 7705 break; 7706 } 7707 case RISCVISD::VMV_X_S: 7708 // The number of sign bits of the scalar result is computed by obtaining the 7709 // element type of the input vector operand, subtracting its width from the 7710 // XLEN, and then adding one (sign bit within the element type). If the 7711 // element type is wider than XLen, the least-significant XLEN bits are 7712 // taken. 7713 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen()) 7714 return 1; 7715 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1; 7716 } 7717 7718 return 1; 7719 } 7720 7721 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 7722 MachineBasicBlock *BB) { 7723 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 7724 7725 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 7726 // Should the count have wrapped while it was being read, we need to try 7727 // again. 7728 // ... 7729 // read: 7730 // rdcycleh x3 # load high word of cycle 7731 // rdcycle x2 # load low word of cycle 7732 // rdcycleh x4 # load high word of cycle 7733 // bne x3, x4, read # check if high word reads match, otherwise try again 7734 // ... 7735 7736 MachineFunction &MF = *BB->getParent(); 7737 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7738 MachineFunction::iterator It = ++BB->getIterator(); 7739 7740 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 7741 MF.insert(It, LoopMBB); 7742 7743 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 7744 MF.insert(It, DoneMBB); 7745 7746 // Transfer the remainder of BB and its successor edges to DoneMBB. 7747 DoneMBB->splice(DoneMBB->begin(), BB, 7748 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7749 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 7750 7751 BB->addSuccessor(LoopMBB); 7752 7753 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 7754 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 7755 Register LoReg = MI.getOperand(0).getReg(); 7756 Register HiReg = MI.getOperand(1).getReg(); 7757 DebugLoc DL = MI.getDebugLoc(); 7758 7759 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 7760 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 7761 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 7762 .addReg(RISCV::X0); 7763 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 7764 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 7765 .addReg(RISCV::X0); 7766 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 7767 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 7768 .addReg(RISCV::X0); 7769 7770 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 7771 .addReg(HiReg) 7772 .addReg(ReadAgainReg) 7773 .addMBB(LoopMBB); 7774 7775 LoopMBB->addSuccessor(LoopMBB); 7776 LoopMBB->addSuccessor(DoneMBB); 7777 7778 MI.eraseFromParent(); 7779 7780 return DoneMBB; 7781 } 7782 7783 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 7784 MachineBasicBlock *BB) { 7785 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 7786 7787 MachineFunction &MF = *BB->getParent(); 7788 DebugLoc DL = MI.getDebugLoc(); 7789 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 7790 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 7791 Register LoReg = MI.getOperand(0).getReg(); 7792 Register HiReg = MI.getOperand(1).getReg(); 7793 Register SrcReg = MI.getOperand(2).getReg(); 7794 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 7795 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 7796 7797 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 7798 RI); 7799 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 7800 MachineMemOperand *MMOLo = 7801 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8)); 7802 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 7803 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8)); 7804 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 7805 .addFrameIndex(FI) 7806 .addImm(0) 7807 .addMemOperand(MMOLo); 7808 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 7809 .addFrameIndex(FI) 7810 .addImm(4) 7811 .addMemOperand(MMOHi); 7812 MI.eraseFromParent(); // The pseudo instruction is gone now. 7813 return BB; 7814 } 7815 7816 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 7817 MachineBasicBlock *BB) { 7818 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 7819 "Unexpected instruction"); 7820 7821 MachineFunction &MF = *BB->getParent(); 7822 DebugLoc DL = MI.getDebugLoc(); 7823 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 7824 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 7825 Register DstReg = MI.getOperand(0).getReg(); 7826 Register LoReg = MI.getOperand(1).getReg(); 7827 Register HiReg = MI.getOperand(2).getReg(); 7828 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 7829 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 7830 7831 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 7832 MachineMemOperand *MMOLo = 7833 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8)); 7834 MachineMemOperand *MMOHi = MF.getMachineMemOperand( 7835 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8)); 7836 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 7837 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 7838 .addFrameIndex(FI) 7839 .addImm(0) 7840 .addMemOperand(MMOLo); 7841 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 7842 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 7843 .addFrameIndex(FI) 7844 .addImm(4) 7845 .addMemOperand(MMOHi); 7846 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 7847 MI.eraseFromParent(); // The pseudo instruction is gone now. 7848 return BB; 7849 } 7850 7851 static bool isSelectPseudo(MachineInstr &MI) { 7852 switch (MI.getOpcode()) { 7853 default: 7854 return false; 7855 case RISCV::Select_GPR_Using_CC_GPR: 7856 case RISCV::Select_FPR16_Using_CC_GPR: 7857 case RISCV::Select_FPR32_Using_CC_GPR: 7858 case RISCV::Select_FPR64_Using_CC_GPR: 7859 return true; 7860 } 7861 } 7862 7863 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 7864 MachineBasicBlock *BB, 7865 const RISCVSubtarget &Subtarget) { 7866 // To "insert" Select_* instructions, we actually have to insert the triangle 7867 // control-flow pattern. The incoming instructions know the destination vreg 7868 // to set, the condition code register to branch on, the true/false values to 7869 // select between, and the condcode to use to select the appropriate branch. 7870 // 7871 // We produce the following control flow: 7872 // HeadMBB 7873 // | \ 7874 // | IfFalseMBB 7875 // | / 7876 // TailMBB 7877 // 7878 // When we find a sequence of selects we attempt to optimize their emission 7879 // by sharing the control flow. Currently we only handle cases where we have 7880 // multiple selects with the exact same condition (same LHS, RHS and CC). 7881 // The selects may be interleaved with other instructions if the other 7882 // instructions meet some requirements we deem safe: 7883 // - They are debug instructions. Otherwise, 7884 // - They do not have side-effects, do not access memory and their inputs do 7885 // not depend on the results of the select pseudo-instructions. 7886 // The TrueV/FalseV operands of the selects cannot depend on the result of 7887 // previous selects in the sequence. 7888 // These conditions could be further relaxed. See the X86 target for a 7889 // related approach and more information. 7890 Register LHS = MI.getOperand(1).getReg(); 7891 Register RHS = MI.getOperand(2).getReg(); 7892 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 7893 7894 SmallVector<MachineInstr *, 4> SelectDebugValues; 7895 SmallSet<Register, 4> SelectDests; 7896 SelectDests.insert(MI.getOperand(0).getReg()); 7897 7898 MachineInstr *LastSelectPseudo = &MI; 7899 7900 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 7901 SequenceMBBI != E; ++SequenceMBBI) { 7902 if (SequenceMBBI->isDebugInstr()) 7903 continue; 7904 else if (isSelectPseudo(*SequenceMBBI)) { 7905 if (SequenceMBBI->getOperand(1).getReg() != LHS || 7906 SequenceMBBI->getOperand(2).getReg() != RHS || 7907 SequenceMBBI->getOperand(3).getImm() != CC || 7908 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 7909 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 7910 break; 7911 LastSelectPseudo = &*SequenceMBBI; 7912 SequenceMBBI->collectDebugValues(SelectDebugValues); 7913 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 7914 } else { 7915 if (SequenceMBBI->hasUnmodeledSideEffects() || 7916 SequenceMBBI->mayLoadOrStore()) 7917 break; 7918 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 7919 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 7920 })) 7921 break; 7922 } 7923 } 7924 7925 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); 7926 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7927 DebugLoc DL = MI.getDebugLoc(); 7928 MachineFunction::iterator I = ++BB->getIterator(); 7929 7930 MachineBasicBlock *HeadMBB = BB; 7931 MachineFunction *F = BB->getParent(); 7932 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 7933 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 7934 7935 F->insert(I, IfFalseMBB); 7936 F->insert(I, TailMBB); 7937 7938 // Transfer debug instructions associated with the selects to TailMBB. 7939 for (MachineInstr *DebugInstr : SelectDebugValues) { 7940 TailMBB->push_back(DebugInstr->removeFromParent()); 7941 } 7942 7943 // Move all instructions after the sequence to TailMBB. 7944 TailMBB->splice(TailMBB->end(), HeadMBB, 7945 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 7946 // Update machine-CFG edges by transferring all successors of the current 7947 // block to the new block which will contain the Phi nodes for the selects. 7948 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 7949 // Set the successors for HeadMBB. 7950 HeadMBB->addSuccessor(IfFalseMBB); 7951 HeadMBB->addSuccessor(TailMBB); 7952 7953 // Insert appropriate branch. 7954 BuildMI(HeadMBB, DL, TII.getBrCond(CC)) 7955 .addReg(LHS) 7956 .addReg(RHS) 7957 .addMBB(TailMBB); 7958 7959 // IfFalseMBB just falls through to TailMBB. 7960 IfFalseMBB->addSuccessor(TailMBB); 7961 7962 // Create PHIs for all of the select pseudo-instructions. 7963 auto SelectMBBI = MI.getIterator(); 7964 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 7965 auto InsertionPoint = TailMBB->begin(); 7966 while (SelectMBBI != SelectEnd) { 7967 auto Next = std::next(SelectMBBI); 7968 if (isSelectPseudo(*SelectMBBI)) { 7969 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 7970 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 7971 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 7972 .addReg(SelectMBBI->getOperand(4).getReg()) 7973 .addMBB(HeadMBB) 7974 .addReg(SelectMBBI->getOperand(5).getReg()) 7975 .addMBB(IfFalseMBB); 7976 SelectMBBI->eraseFromParent(); 7977 } 7978 SelectMBBI = Next; 7979 } 7980 7981 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 7982 return TailMBB; 7983 } 7984 7985 MachineBasicBlock * 7986 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 7987 MachineBasicBlock *BB) const { 7988 switch (MI.getOpcode()) { 7989 default: 7990 llvm_unreachable("Unexpected instr type to insert"); 7991 case RISCV::ReadCycleWide: 7992 assert(!Subtarget.is64Bit() && 7993 "ReadCycleWrite is only to be used on riscv32"); 7994 return emitReadCycleWidePseudo(MI, BB); 7995 case RISCV::Select_GPR_Using_CC_GPR: 7996 case RISCV::Select_FPR16_Using_CC_GPR: 7997 case RISCV::Select_FPR32_Using_CC_GPR: 7998 case RISCV::Select_FPR64_Using_CC_GPR: 7999 return emitSelectPseudo(MI, BB, Subtarget); 8000 case RISCV::BuildPairF64Pseudo: 8001 return emitBuildPairF64Pseudo(MI, BB); 8002 case RISCV::SplitF64Pseudo: 8003 return emitSplitF64Pseudo(MI, BB); 8004 } 8005 } 8006 8007 // Calling Convention Implementation. 8008 // The expectations for frontend ABI lowering vary from target to target. 8009 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 8010 // details, but this is a longer term goal. For now, we simply try to keep the 8011 // role of the frontend as simple and well-defined as possible. The rules can 8012 // be summarised as: 8013 // * Never split up large scalar arguments. We handle them here. 8014 // * If a hardfloat calling convention is being used, and the struct may be 8015 // passed in a pair of registers (fp+fp, int+fp), and both registers are 8016 // available, then pass as two separate arguments. If either the GPRs or FPRs 8017 // are exhausted, then pass according to the rule below. 8018 // * If a struct could never be passed in registers or directly in a stack 8019 // slot (as it is larger than 2*XLEN and the floating point rules don't 8020 // apply), then pass it using a pointer with the byval attribute. 8021 // * If a struct is less than 2*XLEN, then coerce to either a two-element 8022 // word-sized array or a 2*XLEN scalar (depending on alignment). 8023 // * The frontend can determine whether a struct is returned by reference or 8024 // not based on its size and fields. If it will be returned by reference, the 8025 // frontend must modify the prototype so a pointer with the sret annotation is 8026 // passed as the first argument. This is not necessary for large scalar 8027 // returns. 8028 // * Struct return values and varargs should be coerced to structs containing 8029 // register-size fields in the same situations they would be for fixed 8030 // arguments. 8031 8032 static const MCPhysReg ArgGPRs[] = { 8033 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 8034 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 8035 }; 8036 static const MCPhysReg ArgFPR16s[] = { 8037 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, 8038 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H 8039 }; 8040 static const MCPhysReg ArgFPR32s[] = { 8041 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 8042 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 8043 }; 8044 static const MCPhysReg ArgFPR64s[] = { 8045 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 8046 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 8047 }; 8048 // This is an interim calling convention and it may be changed in the future. 8049 static const MCPhysReg ArgVRs[] = { 8050 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13, 8051 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19, 8052 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23}; 8053 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2, 8054 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2, 8055 RISCV::V20M2, RISCV::V22M2}; 8056 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, 8057 RISCV::V20M4}; 8058 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; 8059 8060 // Pass a 2*XLEN argument that has been split into two XLEN values through 8061 // registers or the stack as necessary. 8062 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 8063 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 8064 MVT ValVT2, MVT LocVT2, 8065 ISD::ArgFlagsTy ArgFlags2) { 8066 unsigned XLenInBytes = XLen / 8; 8067 if (Register Reg = State.AllocateReg(ArgGPRs)) { 8068 // At least one half can be passed via register. 8069 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 8070 VA1.getLocVT(), CCValAssign::Full)); 8071 } else { 8072 // Both halves must be passed on the stack, with proper alignment. 8073 Align StackAlign = 8074 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 8075 State.addLoc( 8076 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 8077 State.AllocateStack(XLenInBytes, StackAlign), 8078 VA1.getLocVT(), CCValAssign::Full)); 8079 State.addLoc(CCValAssign::getMem( 8080 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 8081 LocVT2, CCValAssign::Full)); 8082 return false; 8083 } 8084 8085 if (Register Reg = State.AllocateReg(ArgGPRs)) { 8086 // The second half can also be passed via register. 8087 State.addLoc( 8088 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 8089 } else { 8090 // The second half is passed via the stack, without additional alignment. 8091 State.addLoc(CCValAssign::getMem( 8092 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 8093 LocVT2, CCValAssign::Full)); 8094 } 8095 8096 return false; 8097 } 8098 8099 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, 8100 Optional<unsigned> FirstMaskArgument, 8101 CCState &State, const RISCVTargetLowering &TLI) { 8102 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); 8103 if (RC == &RISCV::VRRegClass) { 8104 // Assign the first mask argument to V0. 8105 // This is an interim calling convention and it may be changed in the 8106 // future. 8107 if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue()) 8108 return State.AllocateReg(RISCV::V0); 8109 return State.AllocateReg(ArgVRs); 8110 } 8111 if (RC == &RISCV::VRM2RegClass) 8112 return State.AllocateReg(ArgVRM2s); 8113 if (RC == &RISCV::VRM4RegClass) 8114 return State.AllocateReg(ArgVRM4s); 8115 if (RC == &RISCV::VRM8RegClass) 8116 return State.AllocateReg(ArgVRM8s); 8117 llvm_unreachable("Unhandled register class for ValueType"); 8118 } 8119 8120 // Implements the RISC-V calling convention. Returns true upon failure. 8121 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 8122 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 8123 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 8124 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 8125 Optional<unsigned> FirstMaskArgument) { 8126 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 8127 assert(XLen == 32 || XLen == 64); 8128 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 8129 8130 // Any return value split in to more than two values can't be returned 8131 // directly. Vectors are returned via the available vector registers. 8132 if (!LocVT.isVector() && IsRet && ValNo > 1) 8133 return true; 8134 8135 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a 8136 // variadic argument, or if no F16/F32 argument registers are available. 8137 bool UseGPRForF16_F32 = true; 8138 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 8139 // variadic argument, or if no F64 argument registers are available. 8140 bool UseGPRForF64 = true; 8141 8142 switch (ABI) { 8143 default: 8144 llvm_unreachable("Unexpected ABI"); 8145 case RISCVABI::ABI_ILP32: 8146 case RISCVABI::ABI_LP64: 8147 break; 8148 case RISCVABI::ABI_ILP32F: 8149 case RISCVABI::ABI_LP64F: 8150 UseGPRForF16_F32 = !IsFixed; 8151 break; 8152 case RISCVABI::ABI_ILP32D: 8153 case RISCVABI::ABI_LP64D: 8154 UseGPRForF16_F32 = !IsFixed; 8155 UseGPRForF64 = !IsFixed; 8156 break; 8157 } 8158 8159 // FPR16, FPR32, and FPR64 alias each other. 8160 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { 8161 UseGPRForF16_F32 = true; 8162 UseGPRForF64 = true; 8163 } 8164 8165 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and 8166 // similar local variables rather than directly checking against the target 8167 // ABI. 8168 8169 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { 8170 LocVT = XLenVT; 8171 LocInfo = CCValAssign::BCvt; 8172 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 8173 LocVT = MVT::i64; 8174 LocInfo = CCValAssign::BCvt; 8175 } 8176 8177 // If this is a variadic argument, the RISC-V calling convention requires 8178 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 8179 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 8180 // be used regardless of whether the original argument was split during 8181 // legalisation or not. The argument will not be passed by registers if the 8182 // original type is larger than 2*XLEN, so the register alignment rule does 8183 // not apply. 8184 unsigned TwoXLenInBytes = (2 * XLen) / 8; 8185 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 8186 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 8187 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 8188 // Skip 'odd' register if necessary. 8189 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 8190 State.AllocateReg(ArgGPRs); 8191 } 8192 8193 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 8194 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 8195 State.getPendingArgFlags(); 8196 8197 assert(PendingLocs.size() == PendingArgFlags.size() && 8198 "PendingLocs and PendingArgFlags out of sync"); 8199 8200 // Handle passing f64 on RV32D with a soft float ABI or when floating point 8201 // registers are exhausted. 8202 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 8203 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 8204 "Can't lower f64 if it is split"); 8205 // Depending on available argument GPRS, f64 may be passed in a pair of 8206 // GPRs, split between a GPR and the stack, or passed completely on the 8207 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 8208 // cases. 8209 Register Reg = State.AllocateReg(ArgGPRs); 8210 LocVT = MVT::i32; 8211 if (!Reg) { 8212 unsigned StackOffset = State.AllocateStack(8, Align(8)); 8213 State.addLoc( 8214 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 8215 return false; 8216 } 8217 if (!State.AllocateReg(ArgGPRs)) 8218 State.AllocateStack(4, Align(4)); 8219 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8220 return false; 8221 } 8222 8223 // Fixed-length vectors are located in the corresponding scalable-vector 8224 // container types. 8225 if (ValVT.isFixedLengthVector()) 8226 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 8227 8228 // Split arguments might be passed indirectly, so keep track of the pending 8229 // values. Split vectors are passed via a mix of registers and indirectly, so 8230 // treat them as we would any other argument. 8231 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { 8232 LocVT = XLenVT; 8233 LocInfo = CCValAssign::Indirect; 8234 PendingLocs.push_back( 8235 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 8236 PendingArgFlags.push_back(ArgFlags); 8237 if (!ArgFlags.isSplitEnd()) { 8238 return false; 8239 } 8240 } 8241 8242 // If the split argument only had two elements, it should be passed directly 8243 // in registers or on the stack. 8244 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && 8245 PendingLocs.size() <= 2) { 8246 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 8247 // Apply the normal calling convention rules to the first half of the 8248 // split argument. 8249 CCValAssign VA = PendingLocs[0]; 8250 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 8251 PendingLocs.clear(); 8252 PendingArgFlags.clear(); 8253 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 8254 ArgFlags); 8255 } 8256 8257 // Allocate to a register if possible, or else a stack slot. 8258 Register Reg; 8259 unsigned StoreSizeBytes = XLen / 8; 8260 Align StackAlign = Align(XLen / 8); 8261 8262 if (ValVT == MVT::f16 && !UseGPRForF16_F32) 8263 Reg = State.AllocateReg(ArgFPR16s); 8264 else if (ValVT == MVT::f32 && !UseGPRForF16_F32) 8265 Reg = State.AllocateReg(ArgFPR32s); 8266 else if (ValVT == MVT::f64 && !UseGPRForF64) 8267 Reg = State.AllocateReg(ArgFPR64s); 8268 else if (ValVT.isVector()) { 8269 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI); 8270 if (!Reg) { 8271 // For return values, the vector must be passed fully via registers or 8272 // via the stack. 8273 // FIXME: The proposed vector ABI only mandates v8-v15 for return values, 8274 // but we're using all of them. 8275 if (IsRet) 8276 return true; 8277 // Try using a GPR to pass the address 8278 if ((Reg = State.AllocateReg(ArgGPRs))) { 8279 LocVT = XLenVT; 8280 LocInfo = CCValAssign::Indirect; 8281 } else if (ValVT.isScalableVector()) { 8282 report_fatal_error("Unable to pass scalable vector types on the stack"); 8283 } else { 8284 // Pass fixed-length vectors on the stack. 8285 LocVT = ValVT; 8286 StoreSizeBytes = ValVT.getStoreSize(); 8287 // Align vectors to their element sizes, being careful for vXi1 8288 // vectors. 8289 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 8290 } 8291 } 8292 } else { 8293 Reg = State.AllocateReg(ArgGPRs); 8294 } 8295 8296 unsigned StackOffset = 8297 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign); 8298 8299 // If we reach this point and PendingLocs is non-empty, we must be at the 8300 // end of a split argument that must be passed indirectly. 8301 if (!PendingLocs.empty()) { 8302 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 8303 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 8304 8305 for (auto &It : PendingLocs) { 8306 if (Reg) 8307 It.convertToReg(Reg); 8308 else 8309 It.convertToMem(StackOffset); 8310 State.addLoc(It); 8311 } 8312 PendingLocs.clear(); 8313 PendingArgFlags.clear(); 8314 return false; 8315 } 8316 8317 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT || 8318 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) && 8319 "Expected an XLenVT or vector types at this stage"); 8320 8321 if (Reg) { 8322 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8323 return false; 8324 } 8325 8326 // When a floating-point value is passed on the stack, no bit-conversion is 8327 // needed. 8328 if (ValVT.isFloatingPoint()) { 8329 LocVT = ValVT; 8330 LocInfo = CCValAssign::Full; 8331 } 8332 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 8333 return false; 8334 } 8335 8336 template <typename ArgTy> 8337 static Optional<unsigned> preAssignMask(const ArgTy &Args) { 8338 for (const auto &ArgIdx : enumerate(Args)) { 8339 MVT ArgVT = ArgIdx.value().VT; 8340 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1) 8341 return ArgIdx.index(); 8342 } 8343 return None; 8344 } 8345 8346 void RISCVTargetLowering::analyzeInputArgs( 8347 MachineFunction &MF, CCState &CCInfo, 8348 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 8349 RISCVCCAssignFn Fn) const { 8350 unsigned NumArgs = Ins.size(); 8351 FunctionType *FType = MF.getFunction().getFunctionType(); 8352 8353 Optional<unsigned> FirstMaskArgument; 8354 if (Subtarget.hasVInstructions()) 8355 FirstMaskArgument = preAssignMask(Ins); 8356 8357 for (unsigned i = 0; i != NumArgs; ++i) { 8358 MVT ArgVT = Ins[i].VT; 8359 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 8360 8361 Type *ArgTy = nullptr; 8362 if (IsRet) 8363 ArgTy = FType->getReturnType(); 8364 else if (Ins[i].isOrigArg()) 8365 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 8366 8367 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 8368 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 8369 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this, 8370 FirstMaskArgument)) { 8371 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 8372 << EVT(ArgVT).getEVTString() << '\n'); 8373 llvm_unreachable(nullptr); 8374 } 8375 } 8376 } 8377 8378 void RISCVTargetLowering::analyzeOutputArgs( 8379 MachineFunction &MF, CCState &CCInfo, 8380 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 8381 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const { 8382 unsigned NumArgs = Outs.size(); 8383 8384 Optional<unsigned> FirstMaskArgument; 8385 if (Subtarget.hasVInstructions()) 8386 FirstMaskArgument = preAssignMask(Outs); 8387 8388 for (unsigned i = 0; i != NumArgs; i++) { 8389 MVT ArgVT = Outs[i].VT; 8390 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 8391 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 8392 8393 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 8394 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 8395 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this, 8396 FirstMaskArgument)) { 8397 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 8398 << EVT(ArgVT).getEVTString() << "\n"); 8399 llvm_unreachable(nullptr); 8400 } 8401 } 8402 } 8403 8404 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 8405 // values. 8406 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 8407 const CCValAssign &VA, const SDLoc &DL, 8408 const RISCVSubtarget &Subtarget) { 8409 switch (VA.getLocInfo()) { 8410 default: 8411 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 8412 case CCValAssign::Full: 8413 if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) 8414 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); 8415 break; 8416 case CCValAssign::BCvt: 8417 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 8418 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val); 8419 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 8420 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 8421 else 8422 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 8423 break; 8424 } 8425 return Val; 8426 } 8427 8428 // The caller is responsible for loading the full value if the argument is 8429 // passed with CCValAssign::Indirect. 8430 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 8431 const CCValAssign &VA, const SDLoc &DL, 8432 const RISCVTargetLowering &TLI) { 8433 MachineFunction &MF = DAG.getMachineFunction(); 8434 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 8435 EVT LocVT = VA.getLocVT(); 8436 SDValue Val; 8437 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); 8438 Register VReg = RegInfo.createVirtualRegister(RC); 8439 RegInfo.addLiveIn(VA.getLocReg(), VReg); 8440 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 8441 8442 if (VA.getLocInfo() == CCValAssign::Indirect) 8443 return Val; 8444 8445 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget()); 8446 } 8447 8448 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 8449 const CCValAssign &VA, const SDLoc &DL, 8450 const RISCVSubtarget &Subtarget) { 8451 EVT LocVT = VA.getLocVT(); 8452 8453 switch (VA.getLocInfo()) { 8454 default: 8455 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 8456 case CCValAssign::Full: 8457 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) 8458 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); 8459 break; 8460 case CCValAssign::BCvt: 8461 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16) 8462 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val); 8463 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) 8464 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 8465 else 8466 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 8467 break; 8468 } 8469 return Val; 8470 } 8471 8472 // The caller is responsible for loading the full value if the argument is 8473 // passed with CCValAssign::Indirect. 8474 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 8475 const CCValAssign &VA, const SDLoc &DL) { 8476 MachineFunction &MF = DAG.getMachineFunction(); 8477 MachineFrameInfo &MFI = MF.getFrameInfo(); 8478 EVT LocVT = VA.getLocVT(); 8479 EVT ValVT = VA.getValVT(); 8480 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 8481 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(), 8482 /*Immutable=*/true); 8483 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 8484 SDValue Val; 8485 8486 ISD::LoadExtType ExtType; 8487 switch (VA.getLocInfo()) { 8488 default: 8489 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 8490 case CCValAssign::Full: 8491 case CCValAssign::Indirect: 8492 case CCValAssign::BCvt: 8493 ExtType = ISD::NON_EXTLOAD; 8494 break; 8495 } 8496 Val = DAG.getExtLoad( 8497 ExtType, DL, LocVT, Chain, FIN, 8498 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 8499 return Val; 8500 } 8501 8502 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 8503 const CCValAssign &VA, const SDLoc &DL) { 8504 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 8505 "Unexpected VA"); 8506 MachineFunction &MF = DAG.getMachineFunction(); 8507 MachineFrameInfo &MFI = MF.getFrameInfo(); 8508 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 8509 8510 if (VA.isMemLoc()) { 8511 // f64 is passed on the stack. 8512 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 8513 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 8514 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 8515 MachinePointerInfo::getFixedStack(MF, FI)); 8516 } 8517 8518 assert(VA.isRegLoc() && "Expected register VA assignment"); 8519 8520 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 8521 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 8522 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 8523 SDValue Hi; 8524 if (VA.getLocReg() == RISCV::X17) { 8525 // Second half of f64 is passed on the stack. 8526 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 8527 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 8528 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 8529 MachinePointerInfo::getFixedStack(MF, FI)); 8530 } else { 8531 // Second half of f64 is passed in another GPR. 8532 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 8533 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 8534 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 8535 } 8536 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 8537 } 8538 8539 // FastCC has less than 1% performance improvement for some particular 8540 // benchmark. But theoretically, it may has benenfit for some cases. 8541 static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, 8542 unsigned ValNo, MVT ValVT, MVT LocVT, 8543 CCValAssign::LocInfo LocInfo, 8544 ISD::ArgFlagsTy ArgFlags, CCState &State, 8545 bool IsFixed, bool IsRet, Type *OrigTy, 8546 const RISCVTargetLowering &TLI, 8547 Optional<unsigned> FirstMaskArgument) { 8548 8549 // X5 and X6 might be used for save-restore libcall. 8550 static const MCPhysReg GPRList[] = { 8551 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 8552 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 8553 RISCV::X29, RISCV::X30, RISCV::X31}; 8554 8555 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 8556 if (unsigned Reg = State.AllocateReg(GPRList)) { 8557 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8558 return false; 8559 } 8560 } 8561 8562 if (LocVT == MVT::f16) { 8563 static const MCPhysReg FPR16List[] = { 8564 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, 8565 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, 8566 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, 8567 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; 8568 if (unsigned Reg = State.AllocateReg(FPR16List)) { 8569 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8570 return false; 8571 } 8572 } 8573 8574 if (LocVT == MVT::f32) { 8575 static const MCPhysReg FPR32List[] = { 8576 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 8577 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 8578 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 8579 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 8580 if (unsigned Reg = State.AllocateReg(FPR32List)) { 8581 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8582 return false; 8583 } 8584 } 8585 8586 if (LocVT == MVT::f64) { 8587 static const MCPhysReg FPR64List[] = { 8588 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 8589 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 8590 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 8591 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 8592 if (unsigned Reg = State.AllocateReg(FPR64List)) { 8593 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8594 return false; 8595 } 8596 } 8597 8598 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 8599 unsigned Offset4 = State.AllocateStack(4, Align(4)); 8600 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 8601 return false; 8602 } 8603 8604 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 8605 unsigned Offset5 = State.AllocateStack(8, Align(8)); 8606 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 8607 return false; 8608 } 8609 8610 if (LocVT.isVector()) { 8611 if (unsigned Reg = 8612 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) { 8613 // Fixed-length vectors are located in the corresponding scalable-vector 8614 // container types. 8615 if (ValVT.isFixedLengthVector()) 8616 LocVT = TLI.getContainerForFixedLengthVector(LocVT); 8617 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8618 } else { 8619 // Try and pass the address via a "fast" GPR. 8620 if (unsigned GPRReg = State.AllocateReg(GPRList)) { 8621 LocInfo = CCValAssign::Indirect; 8622 LocVT = TLI.getSubtarget().getXLenVT(); 8623 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); 8624 } else if (ValVT.isFixedLengthVector()) { 8625 auto StackAlign = 8626 MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne(); 8627 unsigned StackOffset = 8628 State.AllocateStack(ValVT.getStoreSize(), StackAlign); 8629 State.addLoc( 8630 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 8631 } else { 8632 // Can't pass scalable vectors on the stack. 8633 return true; 8634 } 8635 } 8636 8637 return false; 8638 } 8639 8640 return true; // CC didn't match. 8641 } 8642 8643 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 8644 CCValAssign::LocInfo LocInfo, 8645 ISD::ArgFlagsTy ArgFlags, CCState &State) { 8646 8647 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 8648 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim 8649 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 8650 static const MCPhysReg GPRList[] = { 8651 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, 8652 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27}; 8653 if (unsigned Reg = State.AllocateReg(GPRList)) { 8654 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8655 return false; 8656 } 8657 } 8658 8659 if (LocVT == MVT::f32) { 8660 // Pass in STG registers: F1, ..., F6 8661 // fs0 ... fs5 8662 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F, 8663 RISCV::F18_F, RISCV::F19_F, 8664 RISCV::F20_F, RISCV::F21_F}; 8665 if (unsigned Reg = State.AllocateReg(FPR32List)) { 8666 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8667 return false; 8668 } 8669 } 8670 8671 if (LocVT == MVT::f64) { 8672 // Pass in STG registers: D1, ..., D6 8673 // fs6 ... fs11 8674 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D, 8675 RISCV::F24_D, RISCV::F25_D, 8676 RISCV::F26_D, RISCV::F27_D}; 8677 if (unsigned Reg = State.AllocateReg(FPR64List)) { 8678 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 8679 return false; 8680 } 8681 } 8682 8683 report_fatal_error("No registers left in GHC calling convention"); 8684 return true; 8685 } 8686 8687 // Transform physical registers into virtual registers. 8688 SDValue RISCVTargetLowering::LowerFormalArguments( 8689 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 8690 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 8691 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 8692 8693 MachineFunction &MF = DAG.getMachineFunction(); 8694 8695 switch (CallConv) { 8696 default: 8697 report_fatal_error("Unsupported calling convention"); 8698 case CallingConv::C: 8699 case CallingConv::Fast: 8700 break; 8701 case CallingConv::GHC: 8702 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] || 8703 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD]) 8704 report_fatal_error( 8705 "GHC calling convention requires the F and D instruction set extensions"); 8706 } 8707 8708 const Function &Func = MF.getFunction(); 8709 if (Func.hasFnAttribute("interrupt")) { 8710 if (!Func.arg_empty()) 8711 report_fatal_error( 8712 "Functions with the interrupt attribute cannot have arguments!"); 8713 8714 StringRef Kind = 8715 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 8716 8717 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 8718 report_fatal_error( 8719 "Function interrupt attribute argument not supported!"); 8720 } 8721 8722 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8723 MVT XLenVT = Subtarget.getXLenVT(); 8724 unsigned XLenInBytes = Subtarget.getXLen() / 8; 8725 // Used with vargs to acumulate store chains. 8726 std::vector<SDValue> OutChains; 8727 8728 // Assign locations to all of the incoming arguments. 8729 SmallVector<CCValAssign, 16> ArgLocs; 8730 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 8731 8732 if (CallConv == CallingConv::GHC) 8733 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC); 8734 else 8735 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, 8736 CallConv == CallingConv::Fast ? CC_RISCV_FastCC 8737 : CC_RISCV); 8738 8739 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 8740 CCValAssign &VA = ArgLocs[i]; 8741 SDValue ArgValue; 8742 // Passing f64 on RV32D with a soft float ABI must be handled as a special 8743 // case. 8744 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 8745 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 8746 else if (VA.isRegLoc()) 8747 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); 8748 else 8749 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 8750 8751 if (VA.getLocInfo() == CCValAssign::Indirect) { 8752 // If the original argument was split and passed by reference (e.g. i128 8753 // on RV32), we need to load all parts of it here (using the same 8754 // address). Vectors may be partly split to registers and partly to the 8755 // stack, in which case the base address is partly offset and subsequent 8756 // stores are relative to that. 8757 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 8758 MachinePointerInfo())); 8759 unsigned ArgIndex = Ins[i].OrigArgIndex; 8760 unsigned ArgPartOffset = Ins[i].PartOffset; 8761 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 8762 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 8763 CCValAssign &PartVA = ArgLocs[i + 1]; 8764 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; 8765 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 8766 if (PartVA.getValVT().isScalableVector()) 8767 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 8768 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset); 8769 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 8770 MachinePointerInfo())); 8771 ++i; 8772 } 8773 continue; 8774 } 8775 InVals.push_back(ArgValue); 8776 } 8777 8778 if (IsVarArg) { 8779 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 8780 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 8781 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 8782 MachineFrameInfo &MFI = MF.getFrameInfo(); 8783 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 8784 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 8785 8786 // Offset of the first variable argument from stack pointer, and size of 8787 // the vararg save area. For now, the varargs save area is either zero or 8788 // large enough to hold a0-a7. 8789 int VaArgOffset, VarArgsSaveSize; 8790 8791 // If all registers are allocated, then all varargs must be passed on the 8792 // stack and we don't need to save any argregs. 8793 if (ArgRegs.size() == Idx) { 8794 VaArgOffset = CCInfo.getNextStackOffset(); 8795 VarArgsSaveSize = 0; 8796 } else { 8797 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 8798 VaArgOffset = -VarArgsSaveSize; 8799 } 8800 8801 // Record the frame index of the first variable argument 8802 // which is a value necessary to VASTART. 8803 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 8804 RVFI->setVarArgsFrameIndex(FI); 8805 8806 // If saving an odd number of registers then create an extra stack slot to 8807 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 8808 // offsets to even-numbered registered remain 2*XLEN-aligned. 8809 if (Idx % 2) { 8810 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 8811 VarArgsSaveSize += XLenInBytes; 8812 } 8813 8814 // Copy the integer registers that may have been used for passing varargs 8815 // to the vararg save area. 8816 for (unsigned I = Idx; I < ArgRegs.size(); 8817 ++I, VaArgOffset += XLenInBytes) { 8818 const Register Reg = RegInfo.createVirtualRegister(RC); 8819 RegInfo.addLiveIn(ArgRegs[I], Reg); 8820 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 8821 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 8822 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 8823 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 8824 MachinePointerInfo::getFixedStack(MF, FI)); 8825 cast<StoreSDNode>(Store.getNode()) 8826 ->getMemOperand() 8827 ->setValue((Value *)nullptr); 8828 OutChains.push_back(Store); 8829 } 8830 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 8831 } 8832 8833 // All stores are grouped in one node to allow the matching between 8834 // the size of Ins and InVals. This only happens for vararg functions. 8835 if (!OutChains.empty()) { 8836 OutChains.push_back(Chain); 8837 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 8838 } 8839 8840 return Chain; 8841 } 8842 8843 /// isEligibleForTailCallOptimization - Check whether the call is eligible 8844 /// for tail call optimization. 8845 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 8846 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 8847 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 8848 const SmallVector<CCValAssign, 16> &ArgLocs) const { 8849 8850 auto &Callee = CLI.Callee; 8851 auto CalleeCC = CLI.CallConv; 8852 auto &Outs = CLI.Outs; 8853 auto &Caller = MF.getFunction(); 8854 auto CallerCC = Caller.getCallingConv(); 8855 8856 // Exception-handling functions need a special set of instructions to 8857 // indicate a return to the hardware. Tail-calling another function would 8858 // probably break this. 8859 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 8860 // should be expanded as new function attributes are introduced. 8861 if (Caller.hasFnAttribute("interrupt")) 8862 return false; 8863 8864 // Do not tail call opt if the stack is used to pass parameters. 8865 if (CCInfo.getNextStackOffset() != 0) 8866 return false; 8867 8868 // Do not tail call opt if any parameters need to be passed indirectly. 8869 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 8870 // passed indirectly. So the address of the value will be passed in a 8871 // register, or if not available, then the address is put on the stack. In 8872 // order to pass indirectly, space on the stack often needs to be allocated 8873 // in order to store the value. In this case the CCInfo.getNextStackOffset() 8874 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 8875 // are passed CCValAssign::Indirect. 8876 for (auto &VA : ArgLocs) 8877 if (VA.getLocInfo() == CCValAssign::Indirect) 8878 return false; 8879 8880 // Do not tail call opt if either caller or callee uses struct return 8881 // semantics. 8882 auto IsCallerStructRet = Caller.hasStructRetAttr(); 8883 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 8884 if (IsCallerStructRet || IsCalleeStructRet) 8885 return false; 8886 8887 // Externally-defined functions with weak linkage should not be 8888 // tail-called. The behaviour of branch instructions in this situation (as 8889 // used for tail calls) is implementation-defined, so we cannot rely on the 8890 // linker replacing the tail call with a return. 8891 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 8892 const GlobalValue *GV = G->getGlobal(); 8893 if (GV->hasExternalWeakLinkage()) 8894 return false; 8895 } 8896 8897 // The callee has to preserve all registers the caller needs to preserve. 8898 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 8899 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 8900 if (CalleeCC != CallerCC) { 8901 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 8902 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 8903 return false; 8904 } 8905 8906 // Byval parameters hand the function a pointer directly into the stack area 8907 // we want to reuse during a tail call. Working around this *is* possible 8908 // but less efficient and uglier in LowerCall. 8909 for (auto &Arg : Outs) 8910 if (Arg.Flags.isByVal()) 8911 return false; 8912 8913 return true; 8914 } 8915 8916 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { 8917 return DAG.getDataLayout().getPrefTypeAlign( 8918 VT.getTypeForEVT(*DAG.getContext())); 8919 } 8920 8921 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 8922 // and output parameter nodes. 8923 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 8924 SmallVectorImpl<SDValue> &InVals) const { 8925 SelectionDAG &DAG = CLI.DAG; 8926 SDLoc &DL = CLI.DL; 8927 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 8928 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 8929 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 8930 SDValue Chain = CLI.Chain; 8931 SDValue Callee = CLI.Callee; 8932 bool &IsTailCall = CLI.IsTailCall; 8933 CallingConv::ID CallConv = CLI.CallConv; 8934 bool IsVarArg = CLI.IsVarArg; 8935 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 8936 MVT XLenVT = Subtarget.getXLenVT(); 8937 8938 MachineFunction &MF = DAG.getMachineFunction(); 8939 8940 // Analyze the operands of the call, assigning locations to each operand. 8941 SmallVector<CCValAssign, 16> ArgLocs; 8942 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 8943 8944 if (CallConv == CallingConv::GHC) 8945 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC); 8946 else 8947 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, 8948 CallConv == CallingConv::Fast ? CC_RISCV_FastCC 8949 : CC_RISCV); 8950 8951 // Check if it's really possible to do a tail call. 8952 if (IsTailCall) 8953 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 8954 8955 if (IsTailCall) 8956 ++NumTailCalls; 8957 else if (CLI.CB && CLI.CB->isMustTailCall()) 8958 report_fatal_error("failed to perform tail call elimination on a call " 8959 "site marked musttail"); 8960 8961 // Get a count of how many bytes are to be pushed on the stack. 8962 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 8963 8964 // Create local copies for byval args 8965 SmallVector<SDValue, 8> ByValArgs; 8966 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 8967 ISD::ArgFlagsTy Flags = Outs[i].Flags; 8968 if (!Flags.isByVal()) 8969 continue; 8970 8971 SDValue Arg = OutVals[i]; 8972 unsigned Size = Flags.getByValSize(); 8973 Align Alignment = Flags.getNonZeroByValAlign(); 8974 8975 int FI = 8976 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 8977 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 8978 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 8979 8980 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 8981 /*IsVolatile=*/false, 8982 /*AlwaysInline=*/false, IsTailCall, 8983 MachinePointerInfo(), MachinePointerInfo()); 8984 ByValArgs.push_back(FIPtr); 8985 } 8986 8987 if (!IsTailCall) 8988 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 8989 8990 // Copy argument values to their designated locations. 8991 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 8992 SmallVector<SDValue, 8> MemOpChains; 8993 SDValue StackPtr; 8994 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 8995 CCValAssign &VA = ArgLocs[i]; 8996 SDValue ArgValue = OutVals[i]; 8997 ISD::ArgFlagsTy Flags = Outs[i].Flags; 8998 8999 // Handle passing f64 on RV32D with a soft float ABI as a special case. 9000 bool IsF64OnRV32DSoftABI = 9001 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 9002 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 9003 SDValue SplitF64 = DAG.getNode( 9004 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 9005 SDValue Lo = SplitF64.getValue(0); 9006 SDValue Hi = SplitF64.getValue(1); 9007 9008 Register RegLo = VA.getLocReg(); 9009 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 9010 9011 if (RegLo == RISCV::X17) { 9012 // Second half of f64 is passed on the stack. 9013 // Work out the address of the stack slot. 9014 if (!StackPtr.getNode()) 9015 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 9016 // Emit the store. 9017 MemOpChains.push_back( 9018 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 9019 } else { 9020 // Second half of f64 is passed in another GPR. 9021 assert(RegLo < RISCV::X31 && "Invalid register pair"); 9022 Register RegHigh = RegLo + 1; 9023 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 9024 } 9025 continue; 9026 } 9027 9028 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 9029 // as any other MemLoc. 9030 9031 // Promote the value if needed. 9032 // For now, only handle fully promoted and indirect arguments. 9033 if (VA.getLocInfo() == CCValAssign::Indirect) { 9034 // Store the argument in a stack slot and pass its address. 9035 Align StackAlign = 9036 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), 9037 getPrefTypeAlign(ArgValue.getValueType(), DAG)); 9038 TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); 9039 // If the original argument was split (e.g. i128), we need 9040 // to store the required parts of it here (and pass just one address). 9041 // Vectors may be partly split to registers and partly to the stack, in 9042 // which case the base address is partly offset and subsequent stores are 9043 // relative to that. 9044 unsigned ArgIndex = Outs[i].OrigArgIndex; 9045 unsigned ArgPartOffset = Outs[i].PartOffset; 9046 assert(VA.getValVT().isVector() || ArgPartOffset == 0); 9047 // Calculate the total size to store. We don't have access to what we're 9048 // actually storing other than performing the loop and collecting the 9049 // info. 9050 SmallVector<std::pair<SDValue, SDValue>> Parts; 9051 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 9052 SDValue PartValue = OutVals[i + 1]; 9053 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; 9054 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL); 9055 EVT PartVT = PartValue.getValueType(); 9056 if (PartVT.isScalableVector()) 9057 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset); 9058 StoredSize += PartVT.getStoreSize(); 9059 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); 9060 Parts.push_back(std::make_pair(PartValue, Offset)); 9061 ++i; 9062 } 9063 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); 9064 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 9065 MemOpChains.push_back( 9066 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 9067 MachinePointerInfo::getFixedStack(MF, FI))); 9068 for (const auto &Part : Parts) { 9069 SDValue PartValue = Part.first; 9070 SDValue PartOffset = Part.second; 9071 SDValue Address = 9072 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset); 9073 MemOpChains.push_back( 9074 DAG.getStore(Chain, DL, PartValue, Address, 9075 MachinePointerInfo::getFixedStack(MF, FI))); 9076 } 9077 ArgValue = SpillSlot; 9078 } else { 9079 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget); 9080 } 9081 9082 // Use local copy if it is a byval arg. 9083 if (Flags.isByVal()) 9084 ArgValue = ByValArgs[j++]; 9085 9086 if (VA.isRegLoc()) { 9087 // Queue up the argument copies and emit them at the end. 9088 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 9089 } else { 9090 assert(VA.isMemLoc() && "Argument not register or memory"); 9091 assert(!IsTailCall && "Tail call not allowed if stack is used " 9092 "for passing parameters"); 9093 9094 // Work out the address of the stack slot. 9095 if (!StackPtr.getNode()) 9096 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 9097 SDValue Address = 9098 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 9099 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 9100 9101 // Emit the store. 9102 MemOpChains.push_back( 9103 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 9104 } 9105 } 9106 9107 // Join the stores, which are independent of one another. 9108 if (!MemOpChains.empty()) 9109 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 9110 9111 SDValue Glue; 9112 9113 // Build a sequence of copy-to-reg nodes, chained and glued together. 9114 for (auto &Reg : RegsToPass) { 9115 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 9116 Glue = Chain.getValue(1); 9117 } 9118 9119 // Validate that none of the argument registers have been marked as 9120 // reserved, if so report an error. Do the same for the return address if this 9121 // is not a tailcall. 9122 validateCCReservedRegs(RegsToPass, MF); 9123 if (!IsTailCall && 9124 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 9125 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 9126 MF.getFunction(), 9127 "Return address register required, but has been reserved."}); 9128 9129 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 9130 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 9131 // split it and then direct call can be matched by PseudoCALL. 9132 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 9133 const GlobalValue *GV = S->getGlobal(); 9134 9135 unsigned OpFlags = RISCVII::MO_CALL; 9136 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 9137 OpFlags = RISCVII::MO_PLT; 9138 9139 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 9140 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 9141 unsigned OpFlags = RISCVII::MO_CALL; 9142 9143 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 9144 nullptr)) 9145 OpFlags = RISCVII::MO_PLT; 9146 9147 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 9148 } 9149 9150 // The first call operand is the chain and the second is the target address. 9151 SmallVector<SDValue, 8> Ops; 9152 Ops.push_back(Chain); 9153 Ops.push_back(Callee); 9154 9155 // Add argument registers to the end of the list so that they are 9156 // known live into the call. 9157 for (auto &Reg : RegsToPass) 9158 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 9159 9160 if (!IsTailCall) { 9161 // Add a register mask operand representing the call-preserved registers. 9162 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 9163 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 9164 assert(Mask && "Missing call preserved mask for calling convention"); 9165 Ops.push_back(DAG.getRegisterMask(Mask)); 9166 } 9167 9168 // Glue the call to the argument copies, if any. 9169 if (Glue.getNode()) 9170 Ops.push_back(Glue); 9171 9172 // Emit the call. 9173 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 9174 9175 if (IsTailCall) { 9176 MF.getFrameInfo().setHasTailCall(); 9177 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 9178 } 9179 9180 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 9181 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 9182 Glue = Chain.getValue(1); 9183 9184 // Mark the end of the call, which is glued to the call itself. 9185 Chain = DAG.getCALLSEQ_END(Chain, 9186 DAG.getConstant(NumBytes, DL, PtrVT, true), 9187 DAG.getConstant(0, DL, PtrVT, true), 9188 Glue, DL); 9189 Glue = Chain.getValue(1); 9190 9191 // Assign locations to each value returned by this call. 9192 SmallVector<CCValAssign, 16> RVLocs; 9193 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 9194 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV); 9195 9196 // Copy all of the result registers out of their specified physreg. 9197 for (auto &VA : RVLocs) { 9198 // Copy the value out 9199 SDValue RetValue = 9200 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 9201 // Glue the RetValue to the end of the call sequence 9202 Chain = RetValue.getValue(1); 9203 Glue = RetValue.getValue(2); 9204 9205 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 9206 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 9207 SDValue RetValue2 = 9208 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 9209 Chain = RetValue2.getValue(1); 9210 Glue = RetValue2.getValue(2); 9211 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 9212 RetValue2); 9213 } 9214 9215 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget); 9216 9217 InVals.push_back(RetValue); 9218 } 9219 9220 return Chain; 9221 } 9222 9223 bool RISCVTargetLowering::CanLowerReturn( 9224 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 9225 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 9226 SmallVector<CCValAssign, 16> RVLocs; 9227 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 9228 9229 Optional<unsigned> FirstMaskArgument; 9230 if (Subtarget.hasVInstructions()) 9231 FirstMaskArgument = preAssignMask(Outs); 9232 9233 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 9234 MVT VT = Outs[i].VT; 9235 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 9236 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 9237 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 9238 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr, 9239 *this, FirstMaskArgument)) 9240 return false; 9241 } 9242 return true; 9243 } 9244 9245 SDValue 9246 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 9247 bool IsVarArg, 9248 const SmallVectorImpl<ISD::OutputArg> &Outs, 9249 const SmallVectorImpl<SDValue> &OutVals, 9250 const SDLoc &DL, SelectionDAG &DAG) const { 9251 const MachineFunction &MF = DAG.getMachineFunction(); 9252 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 9253 9254 // Stores the assignment of the return value to a location. 9255 SmallVector<CCValAssign, 16> RVLocs; 9256 9257 // Info about the registers and stack slot. 9258 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 9259 *DAG.getContext()); 9260 9261 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 9262 nullptr, CC_RISCV); 9263 9264 if (CallConv == CallingConv::GHC && !RVLocs.empty()) 9265 report_fatal_error("GHC functions return void only"); 9266 9267 SDValue Glue; 9268 SmallVector<SDValue, 4> RetOps(1, Chain); 9269 9270 // Copy the result values into the output registers. 9271 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 9272 SDValue Val = OutVals[i]; 9273 CCValAssign &VA = RVLocs[i]; 9274 assert(VA.isRegLoc() && "Can only return in registers!"); 9275 9276 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 9277 // Handle returning f64 on RV32D with a soft float ABI. 9278 assert(VA.isRegLoc() && "Expected return via registers"); 9279 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 9280 DAG.getVTList(MVT::i32, MVT::i32), Val); 9281 SDValue Lo = SplitF64.getValue(0); 9282 SDValue Hi = SplitF64.getValue(1); 9283 Register RegLo = VA.getLocReg(); 9284 assert(RegLo < RISCV::X31 && "Invalid register pair"); 9285 Register RegHi = RegLo + 1; 9286 9287 if (STI.isRegisterReservedByUser(RegLo) || 9288 STI.isRegisterReservedByUser(RegHi)) 9289 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 9290 MF.getFunction(), 9291 "Return value register required, but has been reserved."}); 9292 9293 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 9294 Glue = Chain.getValue(1); 9295 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 9296 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 9297 Glue = Chain.getValue(1); 9298 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 9299 } else { 9300 // Handle a 'normal' return. 9301 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget); 9302 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 9303 9304 if (STI.isRegisterReservedByUser(VA.getLocReg())) 9305 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 9306 MF.getFunction(), 9307 "Return value register required, but has been reserved."}); 9308 9309 // Guarantee that all emitted copies are stuck together. 9310 Glue = Chain.getValue(1); 9311 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 9312 } 9313 } 9314 9315 RetOps[0] = Chain; // Update chain. 9316 9317 // Add the glue node if we have it. 9318 if (Glue.getNode()) { 9319 RetOps.push_back(Glue); 9320 } 9321 9322 unsigned RetOpc = RISCVISD::RET_FLAG; 9323 // Interrupt service routines use different return instructions. 9324 const Function &Func = DAG.getMachineFunction().getFunction(); 9325 if (Func.hasFnAttribute("interrupt")) { 9326 if (!Func.getReturnType()->isVoidTy()) 9327 report_fatal_error( 9328 "Functions with the interrupt attribute must have void return type!"); 9329 9330 MachineFunction &MF = DAG.getMachineFunction(); 9331 StringRef Kind = 9332 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 9333 9334 if (Kind == "user") 9335 RetOpc = RISCVISD::URET_FLAG; 9336 else if (Kind == "supervisor") 9337 RetOpc = RISCVISD::SRET_FLAG; 9338 else 9339 RetOpc = RISCVISD::MRET_FLAG; 9340 } 9341 9342 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 9343 } 9344 9345 void RISCVTargetLowering::validateCCReservedRegs( 9346 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 9347 MachineFunction &MF) const { 9348 const Function &F = MF.getFunction(); 9349 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 9350 9351 if (llvm::any_of(Regs, [&STI](auto Reg) { 9352 return STI.isRegisterReservedByUser(Reg.first); 9353 })) 9354 F.getContext().diagnose(DiagnosticInfoUnsupported{ 9355 F, "Argument register required, but has been reserved."}); 9356 } 9357 9358 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 9359 return CI->isTailCall(); 9360 } 9361 9362 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 9363 #define NODE_NAME_CASE(NODE) \ 9364 case RISCVISD::NODE: \ 9365 return "RISCVISD::" #NODE; 9366 // clang-format off 9367 switch ((RISCVISD::NodeType)Opcode) { 9368 case RISCVISD::FIRST_NUMBER: 9369 break; 9370 NODE_NAME_CASE(RET_FLAG) 9371 NODE_NAME_CASE(URET_FLAG) 9372 NODE_NAME_CASE(SRET_FLAG) 9373 NODE_NAME_CASE(MRET_FLAG) 9374 NODE_NAME_CASE(CALL) 9375 NODE_NAME_CASE(SELECT_CC) 9376 NODE_NAME_CASE(BR_CC) 9377 NODE_NAME_CASE(BuildPairF64) 9378 NODE_NAME_CASE(SplitF64) 9379 NODE_NAME_CASE(TAIL) 9380 NODE_NAME_CASE(MULHSU) 9381 NODE_NAME_CASE(SLLW) 9382 NODE_NAME_CASE(SRAW) 9383 NODE_NAME_CASE(SRLW) 9384 NODE_NAME_CASE(DIVW) 9385 NODE_NAME_CASE(DIVUW) 9386 NODE_NAME_CASE(REMUW) 9387 NODE_NAME_CASE(ROLW) 9388 NODE_NAME_CASE(RORW) 9389 NODE_NAME_CASE(CLZW) 9390 NODE_NAME_CASE(CTZW) 9391 NODE_NAME_CASE(FSLW) 9392 NODE_NAME_CASE(FSRW) 9393 NODE_NAME_CASE(FSL) 9394 NODE_NAME_CASE(FSR) 9395 NODE_NAME_CASE(FMV_H_X) 9396 NODE_NAME_CASE(FMV_X_ANYEXTH) 9397 NODE_NAME_CASE(FMV_W_X_RV64) 9398 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) 9399 NODE_NAME_CASE(FCVT_X_RTZ) 9400 NODE_NAME_CASE(FCVT_XU_RTZ) 9401 NODE_NAME_CASE(FCVT_W_RTZ_RV64) 9402 NODE_NAME_CASE(FCVT_WU_RTZ_RV64) 9403 NODE_NAME_CASE(READ_CYCLE_WIDE) 9404 NODE_NAME_CASE(GREV) 9405 NODE_NAME_CASE(GREVW) 9406 NODE_NAME_CASE(GORC) 9407 NODE_NAME_CASE(GORCW) 9408 NODE_NAME_CASE(SHFL) 9409 NODE_NAME_CASE(SHFLW) 9410 NODE_NAME_CASE(UNSHFL) 9411 NODE_NAME_CASE(UNSHFLW) 9412 NODE_NAME_CASE(BCOMPRESS) 9413 NODE_NAME_CASE(BCOMPRESSW) 9414 NODE_NAME_CASE(BDECOMPRESS) 9415 NODE_NAME_CASE(BDECOMPRESSW) 9416 NODE_NAME_CASE(VMV_V_X_VL) 9417 NODE_NAME_CASE(VFMV_V_F_VL) 9418 NODE_NAME_CASE(VMV_X_S) 9419 NODE_NAME_CASE(VMV_S_X_VL) 9420 NODE_NAME_CASE(VFMV_S_F_VL) 9421 NODE_NAME_CASE(SPLAT_VECTOR_I64) 9422 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL) 9423 NODE_NAME_CASE(READ_VLENB) 9424 NODE_NAME_CASE(TRUNCATE_VECTOR_VL) 9425 NODE_NAME_CASE(VSLIDEUP_VL) 9426 NODE_NAME_CASE(VSLIDE1UP_VL) 9427 NODE_NAME_CASE(VSLIDEDOWN_VL) 9428 NODE_NAME_CASE(VSLIDE1DOWN_VL) 9429 NODE_NAME_CASE(VID_VL) 9430 NODE_NAME_CASE(VFNCVT_ROD_VL) 9431 NODE_NAME_CASE(VECREDUCE_ADD_VL) 9432 NODE_NAME_CASE(VECREDUCE_UMAX_VL) 9433 NODE_NAME_CASE(VECREDUCE_SMAX_VL) 9434 NODE_NAME_CASE(VECREDUCE_UMIN_VL) 9435 NODE_NAME_CASE(VECREDUCE_SMIN_VL) 9436 NODE_NAME_CASE(VECREDUCE_AND_VL) 9437 NODE_NAME_CASE(VECREDUCE_OR_VL) 9438 NODE_NAME_CASE(VECREDUCE_XOR_VL) 9439 NODE_NAME_CASE(VECREDUCE_FADD_VL) 9440 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) 9441 NODE_NAME_CASE(VECREDUCE_FMIN_VL) 9442 NODE_NAME_CASE(VECREDUCE_FMAX_VL) 9443 NODE_NAME_CASE(ADD_VL) 9444 NODE_NAME_CASE(AND_VL) 9445 NODE_NAME_CASE(MUL_VL) 9446 NODE_NAME_CASE(OR_VL) 9447 NODE_NAME_CASE(SDIV_VL) 9448 NODE_NAME_CASE(SHL_VL) 9449 NODE_NAME_CASE(SREM_VL) 9450 NODE_NAME_CASE(SRA_VL) 9451 NODE_NAME_CASE(SRL_VL) 9452 NODE_NAME_CASE(SUB_VL) 9453 NODE_NAME_CASE(UDIV_VL) 9454 NODE_NAME_CASE(UREM_VL) 9455 NODE_NAME_CASE(XOR_VL) 9456 NODE_NAME_CASE(SADDSAT_VL) 9457 NODE_NAME_CASE(UADDSAT_VL) 9458 NODE_NAME_CASE(SSUBSAT_VL) 9459 NODE_NAME_CASE(USUBSAT_VL) 9460 NODE_NAME_CASE(FADD_VL) 9461 NODE_NAME_CASE(FSUB_VL) 9462 NODE_NAME_CASE(FMUL_VL) 9463 NODE_NAME_CASE(FDIV_VL) 9464 NODE_NAME_CASE(FNEG_VL) 9465 NODE_NAME_CASE(FABS_VL) 9466 NODE_NAME_CASE(FSQRT_VL) 9467 NODE_NAME_CASE(FMA_VL) 9468 NODE_NAME_CASE(FCOPYSIGN_VL) 9469 NODE_NAME_CASE(SMIN_VL) 9470 NODE_NAME_CASE(SMAX_VL) 9471 NODE_NAME_CASE(UMIN_VL) 9472 NODE_NAME_CASE(UMAX_VL) 9473 NODE_NAME_CASE(FMINNUM_VL) 9474 NODE_NAME_CASE(FMAXNUM_VL) 9475 NODE_NAME_CASE(MULHS_VL) 9476 NODE_NAME_CASE(MULHU_VL) 9477 NODE_NAME_CASE(FP_TO_SINT_VL) 9478 NODE_NAME_CASE(FP_TO_UINT_VL) 9479 NODE_NAME_CASE(SINT_TO_FP_VL) 9480 NODE_NAME_CASE(UINT_TO_FP_VL) 9481 NODE_NAME_CASE(FP_EXTEND_VL) 9482 NODE_NAME_CASE(FP_ROUND_VL) 9483 NODE_NAME_CASE(VWMUL_VL) 9484 NODE_NAME_CASE(VWMULU_VL) 9485 NODE_NAME_CASE(SETCC_VL) 9486 NODE_NAME_CASE(VSELECT_VL) 9487 NODE_NAME_CASE(VMAND_VL) 9488 NODE_NAME_CASE(VMOR_VL) 9489 NODE_NAME_CASE(VMXOR_VL) 9490 NODE_NAME_CASE(VMCLR_VL) 9491 NODE_NAME_CASE(VMSET_VL) 9492 NODE_NAME_CASE(VRGATHER_VX_VL) 9493 NODE_NAME_CASE(VRGATHER_VV_VL) 9494 NODE_NAME_CASE(VRGATHEREI16_VV_VL) 9495 NODE_NAME_CASE(VSEXT_VL) 9496 NODE_NAME_CASE(VZEXT_VL) 9497 NODE_NAME_CASE(VCPOP_VL) 9498 NODE_NAME_CASE(VLE_VL) 9499 NODE_NAME_CASE(VSE_VL) 9500 NODE_NAME_CASE(READ_CSR) 9501 NODE_NAME_CASE(WRITE_CSR) 9502 NODE_NAME_CASE(SWAP_CSR) 9503 } 9504 // clang-format on 9505 return nullptr; 9506 #undef NODE_NAME_CASE 9507 } 9508 9509 /// getConstraintType - Given a constraint letter, return the type of 9510 /// constraint it is for this target. 9511 RISCVTargetLowering::ConstraintType 9512 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 9513 if (Constraint.size() == 1) { 9514 switch (Constraint[0]) { 9515 default: 9516 break; 9517 case 'f': 9518 return C_RegisterClass; 9519 case 'I': 9520 case 'J': 9521 case 'K': 9522 return C_Immediate; 9523 case 'A': 9524 return C_Memory; 9525 case 'S': // A symbolic address 9526 return C_Other; 9527 } 9528 } else { 9529 if (Constraint == "vr" || Constraint == "vm") 9530 return C_RegisterClass; 9531 } 9532 return TargetLowering::getConstraintType(Constraint); 9533 } 9534 9535 std::pair<unsigned, const TargetRegisterClass *> 9536 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 9537 StringRef Constraint, 9538 MVT VT) const { 9539 // First, see if this is a constraint that directly corresponds to a 9540 // RISCV register class. 9541 if (Constraint.size() == 1) { 9542 switch (Constraint[0]) { 9543 case 'r': 9544 return std::make_pair(0U, &RISCV::GPRRegClass); 9545 case 'f': 9546 if (Subtarget.hasStdExtZfh() && VT == MVT::f16) 9547 return std::make_pair(0U, &RISCV::FPR16RegClass); 9548 if (Subtarget.hasStdExtF() && VT == MVT::f32) 9549 return std::make_pair(0U, &RISCV::FPR32RegClass); 9550 if (Subtarget.hasStdExtD() && VT == MVT::f64) 9551 return std::make_pair(0U, &RISCV::FPR64RegClass); 9552 break; 9553 default: 9554 break; 9555 } 9556 } else { 9557 if (Constraint == "vr") { 9558 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, 9559 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 9560 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) 9561 return std::make_pair(0U, RC); 9562 } 9563 } else if (Constraint == "vm") { 9564 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 9565 return std::make_pair(0U, &RISCV::VMRegClass); 9566 } 9567 } 9568 9569 // Clang will correctly decode the usage of register name aliases into their 9570 // official names. However, other frontends like `rustc` do not. This allows 9571 // users of these frontends to use the ABI names for registers in LLVM-style 9572 // register constraints. 9573 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower()) 9574 .Case("{zero}", RISCV::X0) 9575 .Case("{ra}", RISCV::X1) 9576 .Case("{sp}", RISCV::X2) 9577 .Case("{gp}", RISCV::X3) 9578 .Case("{tp}", RISCV::X4) 9579 .Case("{t0}", RISCV::X5) 9580 .Case("{t1}", RISCV::X6) 9581 .Case("{t2}", RISCV::X7) 9582 .Cases("{s0}", "{fp}", RISCV::X8) 9583 .Case("{s1}", RISCV::X9) 9584 .Case("{a0}", RISCV::X10) 9585 .Case("{a1}", RISCV::X11) 9586 .Case("{a2}", RISCV::X12) 9587 .Case("{a3}", RISCV::X13) 9588 .Case("{a4}", RISCV::X14) 9589 .Case("{a5}", RISCV::X15) 9590 .Case("{a6}", RISCV::X16) 9591 .Case("{a7}", RISCV::X17) 9592 .Case("{s2}", RISCV::X18) 9593 .Case("{s3}", RISCV::X19) 9594 .Case("{s4}", RISCV::X20) 9595 .Case("{s5}", RISCV::X21) 9596 .Case("{s6}", RISCV::X22) 9597 .Case("{s7}", RISCV::X23) 9598 .Case("{s8}", RISCV::X24) 9599 .Case("{s9}", RISCV::X25) 9600 .Case("{s10}", RISCV::X26) 9601 .Case("{s11}", RISCV::X27) 9602 .Case("{t3}", RISCV::X28) 9603 .Case("{t4}", RISCV::X29) 9604 .Case("{t5}", RISCV::X30) 9605 .Case("{t6}", RISCV::X31) 9606 .Default(RISCV::NoRegister); 9607 if (XRegFromAlias != RISCV::NoRegister) 9608 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 9609 9610 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 9611 // TableGen record rather than the AsmName to choose registers for InlineAsm 9612 // constraints, plus we want to match those names to the widest floating point 9613 // register type available, manually select floating point registers here. 9614 // 9615 // The second case is the ABI name of the register, so that frontends can also 9616 // use the ABI names in register constraint lists. 9617 if (Subtarget.hasStdExtF()) { 9618 unsigned FReg = StringSwitch<unsigned>(Constraint.lower()) 9619 .Cases("{f0}", "{ft0}", RISCV::F0_F) 9620 .Cases("{f1}", "{ft1}", RISCV::F1_F) 9621 .Cases("{f2}", "{ft2}", RISCV::F2_F) 9622 .Cases("{f3}", "{ft3}", RISCV::F3_F) 9623 .Cases("{f4}", "{ft4}", RISCV::F4_F) 9624 .Cases("{f5}", "{ft5}", RISCV::F5_F) 9625 .Cases("{f6}", "{ft6}", RISCV::F6_F) 9626 .Cases("{f7}", "{ft7}", RISCV::F7_F) 9627 .Cases("{f8}", "{fs0}", RISCV::F8_F) 9628 .Cases("{f9}", "{fs1}", RISCV::F9_F) 9629 .Cases("{f10}", "{fa0}", RISCV::F10_F) 9630 .Cases("{f11}", "{fa1}", RISCV::F11_F) 9631 .Cases("{f12}", "{fa2}", RISCV::F12_F) 9632 .Cases("{f13}", "{fa3}", RISCV::F13_F) 9633 .Cases("{f14}", "{fa4}", RISCV::F14_F) 9634 .Cases("{f15}", "{fa5}", RISCV::F15_F) 9635 .Cases("{f16}", "{fa6}", RISCV::F16_F) 9636 .Cases("{f17}", "{fa7}", RISCV::F17_F) 9637 .Cases("{f18}", "{fs2}", RISCV::F18_F) 9638 .Cases("{f19}", "{fs3}", RISCV::F19_F) 9639 .Cases("{f20}", "{fs4}", RISCV::F20_F) 9640 .Cases("{f21}", "{fs5}", RISCV::F21_F) 9641 .Cases("{f22}", "{fs6}", RISCV::F22_F) 9642 .Cases("{f23}", "{fs7}", RISCV::F23_F) 9643 .Cases("{f24}", "{fs8}", RISCV::F24_F) 9644 .Cases("{f25}", "{fs9}", RISCV::F25_F) 9645 .Cases("{f26}", "{fs10}", RISCV::F26_F) 9646 .Cases("{f27}", "{fs11}", RISCV::F27_F) 9647 .Cases("{f28}", "{ft8}", RISCV::F28_F) 9648 .Cases("{f29}", "{ft9}", RISCV::F29_F) 9649 .Cases("{f30}", "{ft10}", RISCV::F30_F) 9650 .Cases("{f31}", "{ft11}", RISCV::F31_F) 9651 .Default(RISCV::NoRegister); 9652 if (FReg != RISCV::NoRegister) { 9653 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg"); 9654 if (Subtarget.hasStdExtD()) { 9655 unsigned RegNo = FReg - RISCV::F0_F; 9656 unsigned DReg = RISCV::F0_D + RegNo; 9657 return std::make_pair(DReg, &RISCV::FPR64RegClass); 9658 } 9659 return std::make_pair(FReg, &RISCV::FPR32RegClass); 9660 } 9661 } 9662 9663 if (Subtarget.hasVInstructions()) { 9664 Register VReg = StringSwitch<Register>(Constraint.lower()) 9665 .Case("{v0}", RISCV::V0) 9666 .Case("{v1}", RISCV::V1) 9667 .Case("{v2}", RISCV::V2) 9668 .Case("{v3}", RISCV::V3) 9669 .Case("{v4}", RISCV::V4) 9670 .Case("{v5}", RISCV::V5) 9671 .Case("{v6}", RISCV::V6) 9672 .Case("{v7}", RISCV::V7) 9673 .Case("{v8}", RISCV::V8) 9674 .Case("{v9}", RISCV::V9) 9675 .Case("{v10}", RISCV::V10) 9676 .Case("{v11}", RISCV::V11) 9677 .Case("{v12}", RISCV::V12) 9678 .Case("{v13}", RISCV::V13) 9679 .Case("{v14}", RISCV::V14) 9680 .Case("{v15}", RISCV::V15) 9681 .Case("{v16}", RISCV::V16) 9682 .Case("{v17}", RISCV::V17) 9683 .Case("{v18}", RISCV::V18) 9684 .Case("{v19}", RISCV::V19) 9685 .Case("{v20}", RISCV::V20) 9686 .Case("{v21}", RISCV::V21) 9687 .Case("{v22}", RISCV::V22) 9688 .Case("{v23}", RISCV::V23) 9689 .Case("{v24}", RISCV::V24) 9690 .Case("{v25}", RISCV::V25) 9691 .Case("{v26}", RISCV::V26) 9692 .Case("{v27}", RISCV::V27) 9693 .Case("{v28}", RISCV::V28) 9694 .Case("{v29}", RISCV::V29) 9695 .Case("{v30}", RISCV::V30) 9696 .Case("{v31}", RISCV::V31) 9697 .Default(RISCV::NoRegister); 9698 if (VReg != RISCV::NoRegister) { 9699 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) 9700 return std::make_pair(VReg, &RISCV::VMRegClass); 9701 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy)) 9702 return std::make_pair(VReg, &RISCV::VRRegClass); 9703 for (const auto *RC : 9704 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { 9705 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) { 9706 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC); 9707 return std::make_pair(VReg, RC); 9708 } 9709 } 9710 } 9711 } 9712 9713 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 9714 } 9715 9716 unsigned 9717 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 9718 // Currently only support length 1 constraints. 9719 if (ConstraintCode.size() == 1) { 9720 switch (ConstraintCode[0]) { 9721 case 'A': 9722 return InlineAsm::Constraint_A; 9723 default: 9724 break; 9725 } 9726 } 9727 9728 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 9729 } 9730 9731 void RISCVTargetLowering::LowerAsmOperandForConstraint( 9732 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 9733 SelectionDAG &DAG) const { 9734 // Currently only support length 1 constraints. 9735 if (Constraint.length() == 1) { 9736 switch (Constraint[0]) { 9737 case 'I': 9738 // Validate & create a 12-bit signed immediate operand. 9739 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 9740 uint64_t CVal = C->getSExtValue(); 9741 if (isInt<12>(CVal)) 9742 Ops.push_back( 9743 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 9744 } 9745 return; 9746 case 'J': 9747 // Validate & create an integer zero operand. 9748 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 9749 if (C->getZExtValue() == 0) 9750 Ops.push_back( 9751 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 9752 return; 9753 case 'K': 9754 // Validate & create a 5-bit unsigned immediate operand. 9755 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 9756 uint64_t CVal = C->getZExtValue(); 9757 if (isUInt<5>(CVal)) 9758 Ops.push_back( 9759 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 9760 } 9761 return; 9762 case 'S': 9763 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 9764 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), 9765 GA->getValueType(0))); 9766 } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) { 9767 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(), 9768 BA->getValueType(0))); 9769 } 9770 return; 9771 default: 9772 break; 9773 } 9774 } 9775 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 9776 } 9777 9778 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder, 9779 Instruction *Inst, 9780 AtomicOrdering Ord) const { 9781 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 9782 return Builder.CreateFence(Ord); 9783 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 9784 return Builder.CreateFence(AtomicOrdering::Release); 9785 return nullptr; 9786 } 9787 9788 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, 9789 Instruction *Inst, 9790 AtomicOrdering Ord) const { 9791 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 9792 return Builder.CreateFence(AtomicOrdering::Acquire); 9793 return nullptr; 9794 } 9795 9796 TargetLowering::AtomicExpansionKind 9797 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 9798 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 9799 // point operations can't be used in an lr/sc sequence without breaking the 9800 // forward-progress guarantee. 9801 if (AI->isFloatingPointOperation()) 9802 return AtomicExpansionKind::CmpXChg; 9803 9804 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 9805 if (Size == 8 || Size == 16) 9806 return AtomicExpansionKind::MaskedIntrinsic; 9807 return AtomicExpansionKind::None; 9808 } 9809 9810 static Intrinsic::ID 9811 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 9812 if (XLen == 32) { 9813 switch (BinOp) { 9814 default: 9815 llvm_unreachable("Unexpected AtomicRMW BinOp"); 9816 case AtomicRMWInst::Xchg: 9817 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 9818 case AtomicRMWInst::Add: 9819 return Intrinsic::riscv_masked_atomicrmw_add_i32; 9820 case AtomicRMWInst::Sub: 9821 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 9822 case AtomicRMWInst::Nand: 9823 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 9824 case AtomicRMWInst::Max: 9825 return Intrinsic::riscv_masked_atomicrmw_max_i32; 9826 case AtomicRMWInst::Min: 9827 return Intrinsic::riscv_masked_atomicrmw_min_i32; 9828 case AtomicRMWInst::UMax: 9829 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 9830 case AtomicRMWInst::UMin: 9831 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 9832 } 9833 } 9834 9835 if (XLen == 64) { 9836 switch (BinOp) { 9837 default: 9838 llvm_unreachable("Unexpected AtomicRMW BinOp"); 9839 case AtomicRMWInst::Xchg: 9840 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 9841 case AtomicRMWInst::Add: 9842 return Intrinsic::riscv_masked_atomicrmw_add_i64; 9843 case AtomicRMWInst::Sub: 9844 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 9845 case AtomicRMWInst::Nand: 9846 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 9847 case AtomicRMWInst::Max: 9848 return Intrinsic::riscv_masked_atomicrmw_max_i64; 9849 case AtomicRMWInst::Min: 9850 return Intrinsic::riscv_masked_atomicrmw_min_i64; 9851 case AtomicRMWInst::UMax: 9852 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 9853 case AtomicRMWInst::UMin: 9854 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 9855 } 9856 } 9857 9858 llvm_unreachable("Unexpected XLen\n"); 9859 } 9860 9861 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 9862 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 9863 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 9864 unsigned XLen = Subtarget.getXLen(); 9865 Value *Ordering = 9866 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 9867 Type *Tys[] = {AlignedAddr->getType()}; 9868 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 9869 AI->getModule(), 9870 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 9871 9872 if (XLen == 64) { 9873 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 9874 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 9875 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 9876 } 9877 9878 Value *Result; 9879 9880 // Must pass the shift amount needed to sign extend the loaded value prior 9881 // to performing a signed comparison for min/max. ShiftAmt is the number of 9882 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 9883 // is the number of bits to left+right shift the value in order to 9884 // sign-extend. 9885 if (AI->getOperation() == AtomicRMWInst::Min || 9886 AI->getOperation() == AtomicRMWInst::Max) { 9887 const DataLayout &DL = AI->getModule()->getDataLayout(); 9888 unsigned ValWidth = 9889 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 9890 Value *SextShamt = 9891 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 9892 Result = Builder.CreateCall(LrwOpScwLoop, 9893 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 9894 } else { 9895 Result = 9896 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 9897 } 9898 9899 if (XLen == 64) 9900 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 9901 return Result; 9902 } 9903 9904 TargetLowering::AtomicExpansionKind 9905 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 9906 AtomicCmpXchgInst *CI) const { 9907 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 9908 if (Size == 8 || Size == 16) 9909 return AtomicExpansionKind::MaskedIntrinsic; 9910 return AtomicExpansionKind::None; 9911 } 9912 9913 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 9914 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 9915 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 9916 unsigned XLen = Subtarget.getXLen(); 9917 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 9918 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 9919 if (XLen == 64) { 9920 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 9921 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 9922 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 9923 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 9924 } 9925 Type *Tys[] = {AlignedAddr->getType()}; 9926 Function *MaskedCmpXchg = 9927 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 9928 Value *Result = Builder.CreateCall( 9929 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 9930 if (XLen == 64) 9931 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 9932 return Result; 9933 } 9934 9935 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { 9936 return false; 9937 } 9938 9939 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, 9940 EVT VT) const { 9941 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) 9942 return false; 9943 9944 switch (FPVT.getSimpleVT().SimpleTy) { 9945 case MVT::f16: 9946 return Subtarget.hasStdExtZfh(); 9947 case MVT::f32: 9948 return Subtarget.hasStdExtF(); 9949 case MVT::f64: 9950 return Subtarget.hasStdExtD(); 9951 default: 9952 return false; 9953 } 9954 } 9955 9956 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 9957 EVT VT) const { 9958 VT = VT.getScalarType(); 9959 9960 if (!VT.isSimple()) 9961 return false; 9962 9963 switch (VT.getSimpleVT().SimpleTy) { 9964 case MVT::f16: 9965 return Subtarget.hasStdExtZfh(); 9966 case MVT::f32: 9967 return Subtarget.hasStdExtF(); 9968 case MVT::f64: 9969 return Subtarget.hasStdExtD(); 9970 default: 9971 break; 9972 } 9973 9974 return false; 9975 } 9976 9977 Register RISCVTargetLowering::getExceptionPointerRegister( 9978 const Constant *PersonalityFn) const { 9979 return RISCV::X10; 9980 } 9981 9982 Register RISCVTargetLowering::getExceptionSelectorRegister( 9983 const Constant *PersonalityFn) const { 9984 return RISCV::X11; 9985 } 9986 9987 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 9988 // Return false to suppress the unnecessary extensions if the LibCall 9989 // arguments or return value is f32 type for LP64 ABI. 9990 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 9991 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 9992 return false; 9993 9994 return true; 9995 } 9996 9997 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { 9998 if (Subtarget.is64Bit() && Type == MVT::i32) 9999 return true; 10000 10001 return IsSigned; 10002 } 10003 10004 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 10005 SDValue C) const { 10006 // Check integral scalar types. 10007 if (VT.isScalarInteger()) { 10008 // Omit the optimization if the sub target has the M extension and the data 10009 // size exceeds XLen. 10010 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) 10011 return false; 10012 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 10013 // Break the MUL to a SLLI and an ADD/SUB. 10014 const APInt &Imm = ConstNode->getAPIntValue(); 10015 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || 10016 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) 10017 return true; 10018 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12. 10019 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) && 10020 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || 10021 (Imm - 8).isPowerOf2())) 10022 return true; 10023 // Omit the following optimization if the sub target has the M extension 10024 // and the data size >= XLen. 10025 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) 10026 return false; 10027 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs 10028 // a pair of LUI/ADDI. 10029 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { 10030 APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); 10031 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || 10032 (1 - ImmS).isPowerOf2()) 10033 return true; 10034 } 10035 } 10036 } 10037 10038 return false; 10039 } 10040 10041 bool RISCVTargetLowering::isMulAddWithConstProfitable( 10042 const SDValue &AddNode, const SDValue &ConstNode) const { 10043 // Let the DAGCombiner decide for vectors. 10044 EVT VT = AddNode.getValueType(); 10045 if (VT.isVector()) 10046 return true; 10047 10048 // Let the DAGCombiner decide for larger types. 10049 if (VT.getScalarSizeInBits() > Subtarget.getXLen()) 10050 return true; 10051 10052 // It is worse if c1 is simm12 while c1*c2 is not. 10053 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1)); 10054 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode); 10055 const APInt &C1 = C1Node->getAPIntValue(); 10056 const APInt &C2 = C2Node->getAPIntValue(); 10057 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12)) 10058 return false; 10059 10060 // Default to true and let the DAGCombiner decide. 10061 return true; 10062 } 10063 10064 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( 10065 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, 10066 bool *Fast) const { 10067 if (!VT.isVector()) 10068 return false; 10069 10070 EVT ElemVT = VT.getVectorElementType(); 10071 if (Alignment >= ElemVT.getStoreSize()) { 10072 if (Fast) 10073 *Fast = true; 10074 return true; 10075 } 10076 10077 return false; 10078 } 10079 10080 bool RISCVTargetLowering::splitValueIntoRegisterParts( 10081 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 10082 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { 10083 bool IsABIRegCopy = CC.hasValue(); 10084 EVT ValueVT = Val.getValueType(); 10085 if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { 10086 // Cast the f16 to i16, extend to i32, pad with ones to make a float nan, 10087 // and cast to f32. 10088 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); 10089 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); 10090 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, 10091 DAG.getConstant(0xFFFF0000, DL, MVT::i32)); 10092 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); 10093 Parts[0] = Val; 10094 return true; 10095 } 10096 10097 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 10098 LLVMContext &Context = *DAG.getContext(); 10099 EVT ValueEltVT = ValueVT.getVectorElementType(); 10100 EVT PartEltVT = PartVT.getVectorElementType(); 10101 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 10102 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 10103 if (PartVTBitSize % ValueVTBitSize == 0) { 10104 // If the element types are different, bitcast to the same element type of 10105 // PartVT first. 10106 if (ValueEltVT != PartEltVT) { 10107 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 10108 assert(Count != 0 && "The number of element should not be zero."); 10109 EVT SameEltTypeVT = 10110 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 10111 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); 10112 } 10113 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), 10114 Val, DAG.getConstant(0, DL, Subtarget.getXLenVT())); 10115 Parts[0] = Val; 10116 return true; 10117 } 10118 } 10119 return false; 10120 } 10121 10122 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( 10123 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 10124 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const { 10125 bool IsABIRegCopy = CC.hasValue(); 10126 if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { 10127 SDValue Val = Parts[0]; 10128 10129 // Cast the f32 to i32, truncate to i16, and cast back to f16. 10130 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); 10131 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); 10132 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val); 10133 return Val; 10134 } 10135 10136 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { 10137 LLVMContext &Context = *DAG.getContext(); 10138 SDValue Val = Parts[0]; 10139 EVT ValueEltVT = ValueVT.getVectorElementType(); 10140 EVT PartEltVT = PartVT.getVectorElementType(); 10141 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); 10142 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); 10143 if (PartVTBitSize % ValueVTBitSize == 0) { 10144 EVT SameEltTypeVT = ValueVT; 10145 // If the element types are different, convert it to the same element type 10146 // of PartVT. 10147 if (ValueEltVT != PartEltVT) { 10148 unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); 10149 assert(Count != 0 && "The number of element should not be zero."); 10150 SameEltTypeVT = 10151 EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); 10152 } 10153 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val, 10154 DAG.getConstant(0, DL, Subtarget.getXLenVT())); 10155 if (ValueEltVT != PartEltVT) 10156 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 10157 return Val; 10158 } 10159 } 10160 return SDValue(); 10161 } 10162 10163 #define GET_REGISTER_MATCHER 10164 #include "RISCVGenAsmMatcher.inc" 10165 10166 Register 10167 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 10168 const MachineFunction &MF) const { 10169 Register Reg = MatchRegisterAltName(RegName); 10170 if (Reg == RISCV::NoRegister) 10171 Reg = MatchRegisterName(RegName); 10172 if (Reg == RISCV::NoRegister) 10173 report_fatal_error( 10174 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 10175 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 10176 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 10177 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 10178 StringRef(RegName) + "\".")); 10179 return Reg; 10180 } 10181 10182 namespace llvm { 10183 namespace RISCVVIntrinsicsTable { 10184 10185 #define GET_RISCVVIntrinsicsTable_IMPL 10186 #include "RISCVGenSearchableTables.inc" 10187 10188 } // namespace RISCVVIntrinsicsTable 10189 10190 } // namespace llvm 10191