1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISCV uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "RISCVISelLowering.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVRegisterInfo.h" 18 #include "RISCVSubtarget.h" 19 #include "RISCVTargetMachine.h" 20 #include "Utils/RISCVMatInt.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/CodeGen/CallingConvLower.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/SelectionDAGISel.h" 29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 30 #include "llvm/CodeGen/ValueTypes.h" 31 #include "llvm/IR/DiagnosticInfo.h" 32 #include "llvm/IR/DiagnosticPrinter.h" 33 #include "llvm/IR/IntrinsicsRISCV.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "riscv-lower" 42 43 STATISTIC(NumTailCalls, "Number of tail calls"); 44 45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, 46 const RISCVSubtarget &STI) 47 : TargetLowering(TM), Subtarget(STI) { 48 49 if (Subtarget.isRV32E()) 50 report_fatal_error("Codegen not yet implemented for RV32E"); 51 52 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); 54 55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) && 56 !Subtarget.hasStdExtF()) { 57 errs() << "Hard-float 'f' ABI can't be used for a target that " 58 "doesn't support the F instruction set extension (ignoring " 59 "target-abi)\n"; 60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) && 62 !Subtarget.hasStdExtD()) { 63 errs() << "Hard-float 'd' ABI can't be used for a target that " 64 "doesn't support the D instruction set extension (ignoring " 65 "target-abi)\n"; 66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32; 67 } 68 69 switch (ABI) { 70 default: 71 report_fatal_error("Don't know how to lower this ABI"); 72 case RISCVABI::ABI_ILP32: 73 case RISCVABI::ABI_ILP32F: 74 case RISCVABI::ABI_ILP32D: 75 case RISCVABI::ABI_LP64: 76 case RISCVABI::ABI_LP64F: 77 case RISCVABI::ABI_LP64D: 78 break; 79 } 80 81 MVT XLenVT = Subtarget.getXLenVT(); 82 83 // Set up the register classes. 84 addRegisterClass(XLenVT, &RISCV::GPRRegClass); 85 86 if (Subtarget.hasStdExtF()) 87 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); 88 if (Subtarget.hasStdExtD()) 89 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); 90 91 // Compute derived properties from the register classes. 92 computeRegisterProperties(STI.getRegisterInfo()); 93 94 setStackPointerRegisterToSaveRestore(RISCV::X2); 95 96 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) 97 setLoadExtAction(N, XLenVT, MVT::i1, Promote); 98 99 // TODO: add all necessary setOperationAction calls. 100 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); 101 102 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 103 setOperationAction(ISD::BR_CC, XLenVT, Expand); 104 setOperationAction(ISD::SELECT, XLenVT, Custom); 105 setOperationAction(ISD::SELECT_CC, XLenVT, Expand); 106 107 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 108 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 109 110 setOperationAction(ISD::VASTART, MVT::Other, Custom); 111 setOperationAction(ISD::VAARG, MVT::Other, Expand); 112 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 113 setOperationAction(ISD::VAEND, MVT::Other, Expand); 114 115 for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) 116 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 117 118 if (Subtarget.is64Bit()) { 119 setOperationAction(ISD::ADD, MVT::i32, Custom); 120 setOperationAction(ISD::SUB, MVT::i32, Custom); 121 setOperationAction(ISD::SHL, MVT::i32, Custom); 122 setOperationAction(ISD::SRA, MVT::i32, Custom); 123 setOperationAction(ISD::SRL, MVT::i32, Custom); 124 } 125 126 if (!Subtarget.hasStdExtM()) { 127 setOperationAction(ISD::MUL, XLenVT, Expand); 128 setOperationAction(ISD::MULHS, XLenVT, Expand); 129 setOperationAction(ISD::MULHU, XLenVT, Expand); 130 setOperationAction(ISD::SDIV, XLenVT, Expand); 131 setOperationAction(ISD::UDIV, XLenVT, Expand); 132 setOperationAction(ISD::SREM, XLenVT, Expand); 133 setOperationAction(ISD::UREM, XLenVT, Expand); 134 } 135 136 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { 137 setOperationAction(ISD::MUL, MVT::i32, Custom); 138 setOperationAction(ISD::SDIV, MVT::i32, Custom); 139 setOperationAction(ISD::UDIV, MVT::i32, Custom); 140 setOperationAction(ISD::UREM, MVT::i32, Custom); 141 } 142 143 setOperationAction(ISD::SDIVREM, XLenVT, Expand); 144 setOperationAction(ISD::UDIVREM, XLenVT, Expand); 145 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); 146 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); 147 148 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); 149 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); 150 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); 151 152 setOperationAction(ISD::ROTL, XLenVT, Expand); 153 setOperationAction(ISD::ROTR, XLenVT, Expand); 154 setOperationAction(ISD::BSWAP, XLenVT, Expand); 155 setOperationAction(ISD::CTTZ, XLenVT, Expand); 156 setOperationAction(ISD::CTLZ, XLenVT, Expand); 157 setOperationAction(ISD::CTPOP, XLenVT, Expand); 158 159 ISD::CondCode FPCCToExtend[] = { 160 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, 161 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, 162 ISD::SETGE, ISD::SETNE}; 163 164 ISD::NodeType FPOpToExtend[] = { 165 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, 166 ISD::FP_TO_FP16}; 167 168 if (Subtarget.hasStdExtF()) { 169 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 170 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 171 for (auto CC : FPCCToExtend) 172 setCondCodeAction(CC, MVT::f32, Expand); 173 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 174 setOperationAction(ISD::SELECT, MVT::f32, Custom); 175 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 176 for (auto Op : FPOpToExtend) 177 setOperationAction(Op, MVT::f32, Expand); 178 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 179 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 180 } 181 182 if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) 183 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 184 185 if (Subtarget.hasStdExtD()) { 186 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 187 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 188 for (auto CC : FPCCToExtend) 189 setCondCodeAction(CC, MVT::f64, Expand); 190 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 191 setOperationAction(ISD::SELECT, MVT::f64, Custom); 192 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 193 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 194 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 195 for (auto Op : FPOpToExtend) 196 setOperationAction(Op, MVT::f64, Expand); 197 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 198 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 199 } 200 201 if (Subtarget.is64Bit() && 202 !(Subtarget.hasStdExtD() || Subtarget.hasStdExtF())) { 203 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 204 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 205 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); 206 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); 207 } 208 209 setOperationAction(ISD::GlobalAddress, XLenVT, Custom); 210 setOperationAction(ISD::BlockAddress, XLenVT, Custom); 211 setOperationAction(ISD::ConstantPool, XLenVT, Custom); 212 213 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); 214 215 // TODO: On M-mode only targets, the cycle[h] CSR may not be present. 216 // Unfortunately this can't be determined just from the ISA naming string. 217 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, 218 Subtarget.is64Bit() ? Legal : Custom); 219 220 setOperationAction(ISD::TRAP, MVT::Other, Legal); 221 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 222 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 223 224 if (Subtarget.hasStdExtA()) { 225 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); 226 setMinCmpXchgSizeInBits(32); 227 } else { 228 setMaxAtomicSizeInBitsSupported(0); 229 } 230 231 setBooleanContents(ZeroOrOneBooleanContent); 232 233 // Function alignments. 234 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); 235 setMinFunctionAlignment(FunctionAlignment); 236 setPrefFunctionAlignment(FunctionAlignment); 237 238 // Effectively disable jump table generation. 239 setMinimumJumpTableEntries(INT_MAX); 240 241 // Jumps are expensive, compared to logic 242 setJumpIsExpensive(); 243 244 // We can use any register for comparisons 245 setHasMultipleConditionRegisters(); 246 } 247 248 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 249 EVT VT) const { 250 if (!VT.isVector()) 251 return getPointerTy(DL); 252 return VT.changeVectorElementTypeToInteger(); 253 } 254 255 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 256 const CallInst &I, 257 MachineFunction &MF, 258 unsigned Intrinsic) const { 259 switch (Intrinsic) { 260 default: 261 return false; 262 case Intrinsic::riscv_masked_atomicrmw_xchg_i32: 263 case Intrinsic::riscv_masked_atomicrmw_add_i32: 264 case Intrinsic::riscv_masked_atomicrmw_sub_i32: 265 case Intrinsic::riscv_masked_atomicrmw_nand_i32: 266 case Intrinsic::riscv_masked_atomicrmw_max_i32: 267 case Intrinsic::riscv_masked_atomicrmw_min_i32: 268 case Intrinsic::riscv_masked_atomicrmw_umax_i32: 269 case Intrinsic::riscv_masked_atomicrmw_umin_i32: 270 case Intrinsic::riscv_masked_cmpxchg_i32: 271 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); 272 Info.opc = ISD::INTRINSIC_W_CHAIN; 273 Info.memVT = MVT::getVT(PtrTy->getElementType()); 274 Info.ptrVal = I.getArgOperand(0); 275 Info.offset = 0; 276 Info.align = Align(4); 277 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | 278 MachineMemOperand::MOVolatile; 279 return true; 280 } 281 } 282 283 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, 284 const AddrMode &AM, Type *Ty, 285 unsigned AS, 286 Instruction *I) const { 287 // No global is ever allowed as a base. 288 if (AM.BaseGV) 289 return false; 290 291 // Require a 12-bit signed offset. 292 if (!isInt<12>(AM.BaseOffs)) 293 return false; 294 295 switch (AM.Scale) { 296 case 0: // "r+i" or just "i", depending on HasBaseReg. 297 break; 298 case 1: 299 if (!AM.HasBaseReg) // allow "r+i". 300 break; 301 return false; // disallow "r+r" or "r+r+i". 302 default: 303 return false; 304 } 305 306 return true; 307 } 308 309 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 310 return isInt<12>(Imm); 311 } 312 313 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { 314 return isInt<12>(Imm); 315 } 316 317 // On RV32, 64-bit integers are split into their high and low parts and held 318 // in two different registers, so the trunc is free since the low register can 319 // just be used. 320 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { 321 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 322 return false; 323 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); 324 unsigned DestBits = DstTy->getPrimitiveSizeInBits(); 325 return (SrcBits == 64 && DestBits == 32); 326 } 327 328 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { 329 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || 330 !SrcVT.isInteger() || !DstVT.isInteger()) 331 return false; 332 unsigned SrcBits = SrcVT.getSizeInBits(); 333 unsigned DestBits = DstVT.getSizeInBits(); 334 return (SrcBits == 64 && DestBits == 32); 335 } 336 337 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 338 // Zexts are free if they can be combined with a load. 339 if (auto *LD = dyn_cast<LoadSDNode>(Val)) { 340 EVT MemVT = LD->getMemoryVT(); 341 if ((MemVT == MVT::i8 || MemVT == MVT::i16 || 342 (Subtarget.is64Bit() && MemVT == MVT::i32)) && 343 (LD->getExtensionType() == ISD::NON_EXTLOAD || 344 LD->getExtensionType() == ISD::ZEXTLOAD)) 345 return true; 346 } 347 348 return TargetLowering::isZExtFree(Val, VT2); 349 } 350 351 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { 352 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; 353 } 354 355 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 356 bool ForCodeSize) const { 357 if (VT == MVT::f32 && !Subtarget.hasStdExtF()) 358 return false; 359 if (VT == MVT::f64 && !Subtarget.hasStdExtD()) 360 return false; 361 if (Imm.isNegZero()) 362 return false; 363 return Imm.isZero(); 364 } 365 366 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { 367 return (VT == MVT::f32 && Subtarget.hasStdExtF()) || 368 (VT == MVT::f64 && Subtarget.hasStdExtD()); 369 } 370 371 // Changes the condition code and swaps operands if necessary, so the SetCC 372 // operation matches one of the comparisons supported directly in the RISC-V 373 // ISA. 374 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { 375 switch (CC) { 376 default: 377 break; 378 case ISD::SETGT: 379 case ISD::SETLE: 380 case ISD::SETUGT: 381 case ISD::SETULE: 382 CC = ISD::getSetCCSwappedOperands(CC); 383 std::swap(LHS, RHS); 384 break; 385 } 386 } 387 388 // Return the RISC-V branch opcode that matches the given DAG integer 389 // condition code. The CondCode must be one of those supported by the RISC-V 390 // ISA (see normaliseSetCC). 391 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) { 392 switch (CC) { 393 default: 394 llvm_unreachable("Unsupported CondCode"); 395 case ISD::SETEQ: 396 return RISCV::BEQ; 397 case ISD::SETNE: 398 return RISCV::BNE; 399 case ISD::SETLT: 400 return RISCV::BLT; 401 case ISD::SETGE: 402 return RISCV::BGE; 403 case ISD::SETULT: 404 return RISCV::BLTU; 405 case ISD::SETUGE: 406 return RISCV::BGEU; 407 } 408 } 409 410 SDValue RISCVTargetLowering::LowerOperation(SDValue Op, 411 SelectionDAG &DAG) const { 412 switch (Op.getOpcode()) { 413 default: 414 report_fatal_error("unimplemented operand"); 415 case ISD::GlobalAddress: 416 return lowerGlobalAddress(Op, DAG); 417 case ISD::BlockAddress: 418 return lowerBlockAddress(Op, DAG); 419 case ISD::ConstantPool: 420 return lowerConstantPool(Op, DAG); 421 case ISD::GlobalTLSAddress: 422 return lowerGlobalTLSAddress(Op, DAG); 423 case ISD::SELECT: 424 return lowerSELECT(Op, DAG); 425 case ISD::VASTART: 426 return lowerVASTART(Op, DAG); 427 case ISD::FRAMEADDR: 428 return lowerFRAMEADDR(Op, DAG); 429 case ISD::RETURNADDR: 430 return lowerRETURNADDR(Op, DAG); 431 case ISD::SHL_PARTS: 432 return lowerShiftLeftParts(Op, DAG); 433 case ISD::SRA_PARTS: 434 return lowerShiftRightParts(Op, DAG, true); 435 case ISD::SRL_PARTS: 436 return lowerShiftRightParts(Op, DAG, false); 437 case ISD::BITCAST: { 438 assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() && 439 "Unexpected custom legalisation"); 440 SDLoc DL(Op); 441 SDValue Op0 = Op.getOperand(0); 442 if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32) 443 return SDValue(); 444 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 445 SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); 446 return FPConv; 447 } 448 case ISD::INTRINSIC_WO_CHAIN: 449 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 450 } 451 } 452 453 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, 454 SelectionDAG &DAG, unsigned Flags) { 455 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); 456 } 457 458 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, 459 SelectionDAG &DAG, unsigned Flags) { 460 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), 461 Flags); 462 } 463 464 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, 465 SelectionDAG &DAG, unsigned Flags) { 466 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), 467 N->getOffset(), Flags); 468 } 469 470 template <class NodeTy> 471 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, 472 bool IsLocal) const { 473 SDLoc DL(N); 474 EVT Ty = getPointerTy(DAG.getDataLayout()); 475 476 if (isPositionIndependent()) { 477 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 478 if (IsLocal) 479 // Use PC-relative addressing to access the symbol. This generates the 480 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) 481 // %pcrel_lo(auipc)). 482 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 483 484 // Use PC-relative addressing to access the GOT for this symbol, then load 485 // the address from the GOT. This generates the pattern (PseudoLA sym), 486 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). 487 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); 488 } 489 490 switch (getTargetMachine().getCodeModel()) { 491 default: 492 report_fatal_error("Unsupported code model for lowering"); 493 case CodeModel::Small: { 494 // Generate a sequence for accessing addresses within the first 2 GiB of 495 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). 496 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); 497 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); 498 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 499 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); 500 } 501 case CodeModel::Medium: { 502 // Generate a sequence for accessing addresses within any 2GiB range within 503 // the address space. This generates the pattern (PseudoLLA sym), which 504 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). 505 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); 506 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); 507 } 508 } 509 } 510 511 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, 512 SelectionDAG &DAG) const { 513 SDLoc DL(Op); 514 EVT Ty = Op.getValueType(); 515 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 516 int64_t Offset = N->getOffset(); 517 MVT XLenVT = Subtarget.getXLenVT(); 518 519 const GlobalValue *GV = N->getGlobal(); 520 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); 521 SDValue Addr = getAddr(N, DAG, IsLocal); 522 523 // In order to maximise the opportunity for common subexpression elimination, 524 // emit a separate ADD node for the global address offset instead of folding 525 // it in the global address node. Later peephole optimisations may choose to 526 // fold it back in when profitable. 527 if (Offset != 0) 528 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 529 DAG.getConstant(Offset, DL, XLenVT)); 530 return Addr; 531 } 532 533 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, 534 SelectionDAG &DAG) const { 535 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); 536 537 return getAddr(N, DAG); 538 } 539 540 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, 541 SelectionDAG &DAG) const { 542 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); 543 544 return getAddr(N, DAG); 545 } 546 547 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, 548 SelectionDAG &DAG, 549 bool UseGOT) const { 550 SDLoc DL(N); 551 EVT Ty = getPointerTy(DAG.getDataLayout()); 552 const GlobalValue *GV = N->getGlobal(); 553 MVT XLenVT = Subtarget.getXLenVT(); 554 555 if (UseGOT) { 556 // Use PC-relative addressing to access the GOT for this TLS symbol, then 557 // load the address from the GOT and add the thread pointer. This generates 558 // the pattern (PseudoLA_TLS_IE sym), which expands to 559 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). 560 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 561 SDValue Load = 562 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); 563 564 // Add the thread pointer. 565 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 566 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); 567 } 568 569 // Generate a sequence for accessing the address relative to the thread 570 // pointer, with the appropriate adjustment for the thread pointer offset. 571 // This generates the pattern 572 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) 573 SDValue AddrHi = 574 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); 575 SDValue AddrAdd = 576 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); 577 SDValue AddrLo = 578 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); 579 580 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); 581 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); 582 SDValue MNAdd = SDValue( 583 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), 584 0); 585 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); 586 } 587 588 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, 589 SelectionDAG &DAG) const { 590 SDLoc DL(N); 591 EVT Ty = getPointerTy(DAG.getDataLayout()); 592 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); 593 const GlobalValue *GV = N->getGlobal(); 594 595 // Use a PC-relative addressing mode to access the global dynamic GOT address. 596 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to 597 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). 598 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); 599 SDValue Load = 600 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); 601 602 // Prepare argument list to generate call. 603 ArgListTy Args; 604 ArgListEntry Entry; 605 Entry.Node = Load; 606 Entry.Ty = CallTy; 607 Args.push_back(Entry); 608 609 // Setup call to __tls_get_addr. 610 TargetLowering::CallLoweringInfo CLI(DAG); 611 CLI.setDebugLoc(DL) 612 .setChain(DAG.getEntryNode()) 613 .setLibCallee(CallingConv::C, CallTy, 614 DAG.getExternalSymbol("__tls_get_addr", Ty), 615 std::move(Args)); 616 617 return LowerCallTo(CLI).first; 618 } 619 620 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, 621 SelectionDAG &DAG) const { 622 SDLoc DL(Op); 623 EVT Ty = Op.getValueType(); 624 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); 625 int64_t Offset = N->getOffset(); 626 MVT XLenVT = Subtarget.getXLenVT(); 627 628 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); 629 630 SDValue Addr; 631 switch (Model) { 632 case TLSModel::LocalExec: 633 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); 634 break; 635 case TLSModel::InitialExec: 636 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); 637 break; 638 case TLSModel::LocalDynamic: 639 case TLSModel::GeneralDynamic: 640 Addr = getDynamicTLSAddr(N, DAG); 641 break; 642 } 643 644 // In order to maximise the opportunity for common subexpression elimination, 645 // emit a separate ADD node for the global address offset instead of folding 646 // it in the global address node. Later peephole optimisations may choose to 647 // fold it back in when profitable. 648 if (Offset != 0) 649 return DAG.getNode(ISD::ADD, DL, Ty, Addr, 650 DAG.getConstant(Offset, DL, XLenVT)); 651 return Addr; 652 } 653 654 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 655 SDValue CondV = Op.getOperand(0); 656 SDValue TrueV = Op.getOperand(1); 657 SDValue FalseV = Op.getOperand(2); 658 SDLoc DL(Op); 659 MVT XLenVT = Subtarget.getXLenVT(); 660 661 // If the result type is XLenVT and CondV is the output of a SETCC node 662 // which also operated on XLenVT inputs, then merge the SETCC node into the 663 // lowered RISCVISD::SELECT_CC to take advantage of the integer 664 // compare+branch instructions. i.e.: 665 // (select (setcc lhs, rhs, cc), truev, falsev) 666 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev) 667 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC && 668 CondV.getOperand(0).getSimpleValueType() == XLenVT) { 669 SDValue LHS = CondV.getOperand(0); 670 SDValue RHS = CondV.getOperand(1); 671 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2)); 672 ISD::CondCode CCVal = CC->get(); 673 674 normaliseSetCC(LHS, RHS, CCVal); 675 676 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT); 677 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 678 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; 679 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 680 } 681 682 // Otherwise: 683 // (select condv, truev, falsev) 684 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev) 685 SDValue Zero = DAG.getConstant(0, DL, XLenVT); 686 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT); 687 688 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 689 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; 690 691 return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); 692 } 693 694 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { 695 MachineFunction &MF = DAG.getMachineFunction(); 696 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 697 698 SDLoc DL(Op); 699 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 700 getPointerTy(MF.getDataLayout())); 701 702 // vastart just stores the address of the VarArgsFrameIndex slot into the 703 // memory location argument. 704 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 705 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), 706 MachinePointerInfo(SV)); 707 } 708 709 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, 710 SelectionDAG &DAG) const { 711 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 712 MachineFunction &MF = DAG.getMachineFunction(); 713 MachineFrameInfo &MFI = MF.getFrameInfo(); 714 MFI.setFrameAddressIsTaken(true); 715 Register FrameReg = RI.getFrameRegister(MF); 716 int XLenInBytes = Subtarget.getXLen() / 8; 717 718 EVT VT = Op.getValueType(); 719 SDLoc DL(Op); 720 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); 721 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 722 while (Depth--) { 723 int Offset = -(XLenInBytes * 2); 724 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, 725 DAG.getIntPtrConstant(Offset, DL)); 726 FrameAddr = 727 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); 728 } 729 return FrameAddr; 730 } 731 732 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, 733 SelectionDAG &DAG) const { 734 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); 735 MachineFunction &MF = DAG.getMachineFunction(); 736 MachineFrameInfo &MFI = MF.getFrameInfo(); 737 MFI.setReturnAddressIsTaken(true); 738 MVT XLenVT = Subtarget.getXLenVT(); 739 int XLenInBytes = Subtarget.getXLen() / 8; 740 741 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 742 return SDValue(); 743 744 EVT VT = Op.getValueType(); 745 SDLoc DL(Op); 746 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 747 if (Depth) { 748 int Off = -XLenInBytes; 749 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 750 SDValue Offset = DAG.getConstant(Off, DL, VT); 751 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 752 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), 753 MachinePointerInfo()); 754 } 755 756 // Return the value of the return address register, marking it an implicit 757 // live-in. 758 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); 759 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); 760 } 761 762 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, 763 SelectionDAG &DAG) const { 764 SDLoc DL(Op); 765 SDValue Lo = Op.getOperand(0); 766 SDValue Hi = Op.getOperand(1); 767 SDValue Shamt = Op.getOperand(2); 768 EVT VT = Lo.getValueType(); 769 770 // if Shamt-XLEN < 0: // Shamt < XLEN 771 // Lo = Lo << Shamt 772 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) 773 // else: 774 // Lo = 0 775 // Hi = Lo << (Shamt-XLEN) 776 777 SDValue Zero = DAG.getConstant(0, DL, VT); 778 SDValue One = DAG.getConstant(1, DL, VT); 779 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 780 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 781 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 782 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 783 784 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); 785 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); 786 SDValue ShiftRightLo = 787 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); 788 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); 789 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); 790 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); 791 792 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 793 794 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); 795 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 796 797 SDValue Parts[2] = {Lo, Hi}; 798 return DAG.getMergeValues(Parts, DL); 799 } 800 801 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, 802 bool IsSRA) const { 803 SDLoc DL(Op); 804 SDValue Lo = Op.getOperand(0); 805 SDValue Hi = Op.getOperand(1); 806 SDValue Shamt = Op.getOperand(2); 807 EVT VT = Lo.getValueType(); 808 809 // SRA expansion: 810 // if Shamt-XLEN < 0: // Shamt < XLEN 811 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 812 // Hi = Hi >>s Shamt 813 // else: 814 // Lo = Hi >>s (Shamt-XLEN); 815 // Hi = Hi >>s (XLEN-1) 816 // 817 // SRL expansion: 818 // if Shamt-XLEN < 0: // Shamt < XLEN 819 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) 820 // Hi = Hi >>u Shamt 821 // else: 822 // Lo = Hi >>u (Shamt-XLEN); 823 // Hi = 0; 824 825 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; 826 827 SDValue Zero = DAG.getConstant(0, DL, VT); 828 SDValue One = DAG.getConstant(1, DL, VT); 829 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); 830 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); 831 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); 832 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); 833 834 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); 835 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); 836 SDValue ShiftLeftHi = 837 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); 838 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); 839 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); 840 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); 841 SDValue HiFalse = 842 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; 843 844 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); 845 846 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); 847 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); 848 849 SDValue Parts[2] = {Lo, Hi}; 850 return DAG.getMergeValues(Parts, DL); 851 } 852 853 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 854 SelectionDAG &DAG) const { 855 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 856 SDLoc DL(Op); 857 switch (IntNo) { 858 default: 859 return SDValue(); // Don't custom lower most intrinsics. 860 case Intrinsic::thread_pointer: { 861 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 862 return DAG.getRegister(RISCV::X4, PtrVT); 863 } 864 } 865 } 866 867 // Returns the opcode of the target-specific SDNode that implements the 32-bit 868 // form of the given Opcode. 869 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { 870 switch (Opcode) { 871 default: 872 llvm_unreachable("Unexpected opcode"); 873 case ISD::SHL: 874 return RISCVISD::SLLW; 875 case ISD::SRA: 876 return RISCVISD::SRAW; 877 case ISD::SRL: 878 return RISCVISD::SRLW; 879 case ISD::SDIV: 880 return RISCVISD::DIVW; 881 case ISD::UDIV: 882 return RISCVISD::DIVUW; 883 case ISD::UREM: 884 return RISCVISD::REMUW; 885 } 886 } 887 888 // Converts the given 32-bit operation to a target-specific SelectionDAG node. 889 // Because i32 isn't a legal type for RV64, these operations would otherwise 890 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W 891 // later one because the fact the operation was originally of type i32 is 892 // lost. 893 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { 894 SDLoc DL(N); 895 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); 896 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 897 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 898 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); 899 // ReplaceNodeResults requires we maintain the same type for the return value. 900 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 901 } 902 903 // Converts the given 32-bit operation to a i64 operation with signed extension 904 // semantic to reduce the signed extension instructions. 905 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { 906 SDLoc DL(N); 907 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); 908 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); 909 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); 910 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, 911 DAG.getValueType(MVT::i32)); 912 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); 913 } 914 915 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, 916 SmallVectorImpl<SDValue> &Results, 917 SelectionDAG &DAG) const { 918 SDLoc DL(N); 919 switch (N->getOpcode()) { 920 default: 921 llvm_unreachable("Don't know how to custom type legalize this operation!"); 922 case ISD::STRICT_FP_TO_SINT: 923 case ISD::STRICT_FP_TO_UINT: 924 case ISD::FP_TO_SINT: 925 case ISD::FP_TO_UINT: { 926 bool IsStrict = N->isStrictFPOpcode(); 927 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 928 "Unexpected custom legalisation"); 929 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); 930 RTLIB::Libcall LC; 931 if (N->getOpcode() == ISD::FP_TO_SINT || 932 N->getOpcode() == ISD::STRICT_FP_TO_SINT) 933 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); 934 else 935 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); 936 MakeLibCallOptions CallOptions; 937 EVT OpVT = Op0.getValueType(); 938 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); 939 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 940 SDValue Result; 941 std::tie(Result, Chain) = 942 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain); 943 Results.push_back(Result); 944 if (IsStrict) 945 Results.push_back(Chain); 946 break; 947 } 948 case ISD::READCYCLECOUNTER: { 949 assert(!Subtarget.is64Bit() && 950 "READCYCLECOUNTER only has custom type legalization on riscv32"); 951 952 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 953 SDValue RCW = 954 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); 955 956 Results.push_back( 957 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1))); 958 Results.push_back(RCW.getValue(2)); 959 break; 960 } 961 case ISD::ADD: 962 case ISD::SUB: 963 case ISD::MUL: 964 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 965 "Unexpected custom legalisation"); 966 if (N->getOperand(1).getOpcode() == ISD::Constant) 967 return; 968 Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); 969 break; 970 case ISD::SHL: 971 case ISD::SRA: 972 case ISD::SRL: 973 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 974 "Unexpected custom legalisation"); 975 if (N->getOperand(1).getOpcode() == ISD::Constant) 976 return; 977 Results.push_back(customLegalizeToWOp(N, DAG)); 978 break; 979 case ISD::SDIV: 980 case ISD::UDIV: 981 case ISD::UREM: 982 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 983 Subtarget.hasStdExtM() && "Unexpected custom legalisation"); 984 if (N->getOperand(0).getOpcode() == ISD::Constant || 985 N->getOperand(1).getOpcode() == ISD::Constant) 986 return; 987 Results.push_back(customLegalizeToWOp(N, DAG)); 988 break; 989 case ISD::BITCAST: { 990 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && 991 Subtarget.hasStdExtF() && "Unexpected custom legalisation"); 992 SDLoc DL(N); 993 SDValue Op0 = N->getOperand(0); 994 if (Op0.getValueType() != MVT::f32) 995 return; 996 SDValue FPConv = 997 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); 998 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); 999 break; 1000 } 1001 } 1002 } 1003 1004 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, 1005 DAGCombinerInfo &DCI) const { 1006 SelectionDAG &DAG = DCI.DAG; 1007 1008 switch (N->getOpcode()) { 1009 default: 1010 break; 1011 case RISCVISD::SplitF64: { 1012 SDValue Op0 = N->getOperand(0); 1013 // If the input to SplitF64 is just BuildPairF64 then the operation is 1014 // redundant. Instead, use BuildPairF64's operands directly. 1015 if (Op0->getOpcode() == RISCVISD::BuildPairF64) 1016 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); 1017 1018 SDLoc DL(N); 1019 1020 // It's cheaper to materialise two 32-bit integers than to load a double 1021 // from the constant pool and transfer it to integer registers through the 1022 // stack. 1023 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { 1024 APInt V = C->getValueAPF().bitcastToAPInt(); 1025 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); 1026 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); 1027 return DCI.CombineTo(N, Lo, Hi); 1028 } 1029 1030 // This is a target-specific version of a DAGCombine performed in 1031 // DAGCombiner::visitBITCAST. It performs the equivalent of: 1032 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 1033 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 1034 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 1035 !Op0.getNode()->hasOneUse()) 1036 break; 1037 SDValue NewSplitF64 = 1038 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), 1039 Op0.getOperand(0)); 1040 SDValue Lo = NewSplitF64.getValue(0); 1041 SDValue Hi = NewSplitF64.getValue(1); 1042 APInt SignBit = APInt::getSignMask(32); 1043 if (Op0.getOpcode() == ISD::FNEG) { 1044 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, 1045 DAG.getConstant(SignBit, DL, MVT::i32)); 1046 return DCI.CombineTo(N, Lo, NewHi); 1047 } 1048 assert(Op0.getOpcode() == ISD::FABS); 1049 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, 1050 DAG.getConstant(~SignBit, DL, MVT::i32)); 1051 return DCI.CombineTo(N, Lo, NewHi); 1052 } 1053 case RISCVISD::SLLW: 1054 case RISCVISD::SRAW: 1055 case RISCVISD::SRLW: { 1056 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. 1057 SDValue LHS = N->getOperand(0); 1058 SDValue RHS = N->getOperand(1); 1059 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); 1060 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); 1061 if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) || 1062 (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI))) 1063 return SDValue(); 1064 break; 1065 } 1066 case RISCVISD::FMV_X_ANYEXTW_RV64: { 1067 SDLoc DL(N); 1068 SDValue Op0 = N->getOperand(0); 1069 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the 1070 // conversion is unnecessary and can be replaced with an ANY_EXTEND 1071 // of the FMV_W_X_RV64 operand. 1072 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { 1073 SDValue AExtOp = 1074 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0)); 1075 return DCI.CombineTo(N, AExtOp); 1076 } 1077 1078 // This is a target-specific version of a DAGCombine performed in 1079 // DAGCombiner::visitBITCAST. It performs the equivalent of: 1080 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 1081 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 1082 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || 1083 !Op0.getNode()->hasOneUse()) 1084 break; 1085 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, 1086 Op0.getOperand(0)); 1087 APInt SignBit = APInt::getSignMask(32).sext(64); 1088 if (Op0.getOpcode() == ISD::FNEG) { 1089 return DCI.CombineTo(N, 1090 DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, 1091 DAG.getConstant(SignBit, DL, MVT::i64))); 1092 } 1093 assert(Op0.getOpcode() == ISD::FABS); 1094 return DCI.CombineTo(N, 1095 DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, 1096 DAG.getConstant(~SignBit, DL, MVT::i64))); 1097 } 1098 } 1099 1100 return SDValue(); 1101 } 1102 1103 bool RISCVTargetLowering::isDesirableToCommuteWithShift( 1104 const SDNode *N, CombineLevel Level) const { 1105 // The following folds are only desirable if `(OP _, c1 << c2)` can be 1106 // materialised in fewer instructions than `(OP _, c1)`: 1107 // 1108 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 1109 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 1110 SDValue N0 = N->getOperand(0); 1111 EVT Ty = N0.getValueType(); 1112 if (Ty.isScalarInteger() && 1113 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { 1114 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 1115 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1116 if (C1 && C2) { 1117 APInt C1Int = C1->getAPIntValue(); 1118 APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); 1119 1120 // We can materialise `c1 << c2` into an add immediate, so it's "free", 1121 // and the combine should happen, to potentially allow further combines 1122 // later. 1123 if (ShiftedC1Int.getMinSignedBits() <= 64 && 1124 isLegalAddImmediate(ShiftedC1Int.getSExtValue())) 1125 return true; 1126 1127 // We can materialise `c1` in an add immediate, so it's "free", and the 1128 // combine should be prevented. 1129 if (C1Int.getMinSignedBits() <= 64 && 1130 isLegalAddImmediate(C1Int.getSExtValue())) 1131 return false; 1132 1133 // Neither constant will fit into an immediate, so find materialisation 1134 // costs. 1135 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), 1136 Subtarget.is64Bit()); 1137 int ShiftedC1Cost = RISCVMatInt::getIntMatCost( 1138 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); 1139 1140 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the 1141 // combine should be prevented. 1142 if (C1Cost < ShiftedC1Cost) 1143 return false; 1144 } 1145 } 1146 return true; 1147 } 1148 1149 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( 1150 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1151 unsigned Depth) const { 1152 switch (Op.getOpcode()) { 1153 default: 1154 break; 1155 case RISCVISD::SLLW: 1156 case RISCVISD::SRAW: 1157 case RISCVISD::SRLW: 1158 case RISCVISD::DIVW: 1159 case RISCVISD::DIVUW: 1160 case RISCVISD::REMUW: 1161 // TODO: As the result is sign-extended, this is conservatively correct. A 1162 // more precise answer could be calculated for SRAW depending on known 1163 // bits in the shift amount. 1164 return 33; 1165 } 1166 1167 return 1; 1168 } 1169 1170 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, 1171 MachineBasicBlock *BB) { 1172 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); 1173 1174 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. 1175 // Should the count have wrapped while it was being read, we need to try 1176 // again. 1177 // ... 1178 // read: 1179 // rdcycleh x3 # load high word of cycle 1180 // rdcycle x2 # load low word of cycle 1181 // rdcycleh x4 # load high word of cycle 1182 // bne x3, x4, read # check if high word reads match, otherwise try again 1183 // ... 1184 1185 MachineFunction &MF = *BB->getParent(); 1186 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1187 MachineFunction::iterator It = ++BB->getIterator(); 1188 1189 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 1190 MF.insert(It, LoopMBB); 1191 1192 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); 1193 MF.insert(It, DoneMBB); 1194 1195 // Transfer the remainder of BB and its successor edges to DoneMBB. 1196 DoneMBB->splice(DoneMBB->begin(), BB, 1197 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 1198 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 1199 1200 BB->addSuccessor(LoopMBB); 1201 1202 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1203 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1204 Register LoReg = MI.getOperand(0).getReg(); 1205 Register HiReg = MI.getOperand(1).getReg(); 1206 DebugLoc DL = MI.getDebugLoc(); 1207 1208 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 1209 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) 1210 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 1211 .addReg(RISCV::X0); 1212 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) 1213 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) 1214 .addReg(RISCV::X0); 1215 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) 1216 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) 1217 .addReg(RISCV::X0); 1218 1219 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) 1220 .addReg(HiReg) 1221 .addReg(ReadAgainReg) 1222 .addMBB(LoopMBB); 1223 1224 LoopMBB->addSuccessor(LoopMBB); 1225 LoopMBB->addSuccessor(DoneMBB); 1226 1227 MI.eraseFromParent(); 1228 1229 return DoneMBB; 1230 } 1231 1232 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, 1233 MachineBasicBlock *BB) { 1234 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); 1235 1236 MachineFunction &MF = *BB->getParent(); 1237 DebugLoc DL = MI.getDebugLoc(); 1238 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1239 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 1240 Register LoReg = MI.getOperand(0).getReg(); 1241 Register HiReg = MI.getOperand(1).getReg(); 1242 Register SrcReg = MI.getOperand(2).getReg(); 1243 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; 1244 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 1245 1246 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, 1247 RI); 1248 MachineMemOperand *MMO = 1249 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 1250 MachineMemOperand::MOLoad, 8, Align(8)); 1251 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) 1252 .addFrameIndex(FI) 1253 .addImm(0) 1254 .addMemOperand(MMO); 1255 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) 1256 .addFrameIndex(FI) 1257 .addImm(4) 1258 .addMemOperand(MMO); 1259 MI.eraseFromParent(); // The pseudo instruction is gone now. 1260 return BB; 1261 } 1262 1263 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, 1264 MachineBasicBlock *BB) { 1265 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && 1266 "Unexpected instruction"); 1267 1268 MachineFunction &MF = *BB->getParent(); 1269 DebugLoc DL = MI.getDebugLoc(); 1270 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1271 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); 1272 Register DstReg = MI.getOperand(0).getReg(); 1273 Register LoReg = MI.getOperand(1).getReg(); 1274 Register HiReg = MI.getOperand(2).getReg(); 1275 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; 1276 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); 1277 1278 MachineMemOperand *MMO = 1279 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 1280 MachineMemOperand::MOStore, 8, Align(8)); 1281 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 1282 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) 1283 .addFrameIndex(FI) 1284 .addImm(0) 1285 .addMemOperand(MMO); 1286 BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) 1287 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) 1288 .addFrameIndex(FI) 1289 .addImm(4) 1290 .addMemOperand(MMO); 1291 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); 1292 MI.eraseFromParent(); // The pseudo instruction is gone now. 1293 return BB; 1294 } 1295 1296 static bool isSelectPseudo(MachineInstr &MI) { 1297 switch (MI.getOpcode()) { 1298 default: 1299 return false; 1300 case RISCV::Select_GPR_Using_CC_GPR: 1301 case RISCV::Select_FPR32_Using_CC_GPR: 1302 case RISCV::Select_FPR64_Using_CC_GPR: 1303 return true; 1304 } 1305 } 1306 1307 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, 1308 MachineBasicBlock *BB) { 1309 // To "insert" Select_* instructions, we actually have to insert the triangle 1310 // control-flow pattern. The incoming instructions know the destination vreg 1311 // to set, the condition code register to branch on, the true/false values to 1312 // select between, and the condcode to use to select the appropriate branch. 1313 // 1314 // We produce the following control flow: 1315 // HeadMBB 1316 // | \ 1317 // | IfFalseMBB 1318 // | / 1319 // TailMBB 1320 // 1321 // When we find a sequence of selects we attempt to optimize their emission 1322 // by sharing the control flow. Currently we only handle cases where we have 1323 // multiple selects with the exact same condition (same LHS, RHS and CC). 1324 // The selects may be interleaved with other instructions if the other 1325 // instructions meet some requirements we deem safe: 1326 // - They are debug instructions. Otherwise, 1327 // - They do not have side-effects, do not access memory and their inputs do 1328 // not depend on the results of the select pseudo-instructions. 1329 // The TrueV/FalseV operands of the selects cannot depend on the result of 1330 // previous selects in the sequence. 1331 // These conditions could be further relaxed. See the X86 target for a 1332 // related approach and more information. 1333 Register LHS = MI.getOperand(1).getReg(); 1334 Register RHS = MI.getOperand(2).getReg(); 1335 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); 1336 1337 SmallVector<MachineInstr *, 4> SelectDebugValues; 1338 SmallSet<Register, 4> SelectDests; 1339 SelectDests.insert(MI.getOperand(0).getReg()); 1340 1341 MachineInstr *LastSelectPseudo = &MI; 1342 1343 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); 1344 SequenceMBBI != E; ++SequenceMBBI) { 1345 if (SequenceMBBI->isDebugInstr()) 1346 continue; 1347 else if (isSelectPseudo(*SequenceMBBI)) { 1348 if (SequenceMBBI->getOperand(1).getReg() != LHS || 1349 SequenceMBBI->getOperand(2).getReg() != RHS || 1350 SequenceMBBI->getOperand(3).getImm() != CC || 1351 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || 1352 SelectDests.count(SequenceMBBI->getOperand(5).getReg())) 1353 break; 1354 LastSelectPseudo = &*SequenceMBBI; 1355 SequenceMBBI->collectDebugValues(SelectDebugValues); 1356 SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); 1357 } else { 1358 if (SequenceMBBI->hasUnmodeledSideEffects() || 1359 SequenceMBBI->mayLoadOrStore()) 1360 break; 1361 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { 1362 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); 1363 })) 1364 break; 1365 } 1366 } 1367 1368 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 1369 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1370 DebugLoc DL = MI.getDebugLoc(); 1371 MachineFunction::iterator I = ++BB->getIterator(); 1372 1373 MachineBasicBlock *HeadMBB = BB; 1374 MachineFunction *F = BB->getParent(); 1375 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB); 1376 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 1377 1378 F->insert(I, IfFalseMBB); 1379 F->insert(I, TailMBB); 1380 1381 // Transfer debug instructions associated with the selects to TailMBB. 1382 for (MachineInstr *DebugInstr : SelectDebugValues) { 1383 TailMBB->push_back(DebugInstr->removeFromParent()); 1384 } 1385 1386 // Move all instructions after the sequence to TailMBB. 1387 TailMBB->splice(TailMBB->end(), HeadMBB, 1388 std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); 1389 // Update machine-CFG edges by transferring all successors of the current 1390 // block to the new block which will contain the Phi nodes for the selects. 1391 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); 1392 // Set the successors for HeadMBB. 1393 HeadMBB->addSuccessor(IfFalseMBB); 1394 HeadMBB->addSuccessor(TailMBB); 1395 1396 // Insert appropriate branch. 1397 unsigned Opcode = getBranchOpcodeForIntCondCode(CC); 1398 1399 BuildMI(HeadMBB, DL, TII.get(Opcode)) 1400 .addReg(LHS) 1401 .addReg(RHS) 1402 .addMBB(TailMBB); 1403 1404 // IfFalseMBB just falls through to TailMBB. 1405 IfFalseMBB->addSuccessor(TailMBB); 1406 1407 // Create PHIs for all of the select pseudo-instructions. 1408 auto SelectMBBI = MI.getIterator(); 1409 auto SelectEnd = std::next(LastSelectPseudo->getIterator()); 1410 auto InsertionPoint = TailMBB->begin(); 1411 while (SelectMBBI != SelectEnd) { 1412 auto Next = std::next(SelectMBBI); 1413 if (isSelectPseudo(*SelectMBBI)) { 1414 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] 1415 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), 1416 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) 1417 .addReg(SelectMBBI->getOperand(4).getReg()) 1418 .addMBB(HeadMBB) 1419 .addReg(SelectMBBI->getOperand(5).getReg()) 1420 .addMBB(IfFalseMBB); 1421 SelectMBBI->eraseFromParent(); 1422 } 1423 SelectMBBI = Next; 1424 } 1425 1426 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 1427 return TailMBB; 1428 } 1429 1430 MachineBasicBlock * 1431 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1432 MachineBasicBlock *BB) const { 1433 switch (MI.getOpcode()) { 1434 default: 1435 llvm_unreachable("Unexpected instr type to insert"); 1436 case RISCV::ReadCycleWide: 1437 assert(!Subtarget.is64Bit() && 1438 "ReadCycleWrite is only to be used on riscv32"); 1439 return emitReadCycleWidePseudo(MI, BB); 1440 case RISCV::Select_GPR_Using_CC_GPR: 1441 case RISCV::Select_FPR32_Using_CC_GPR: 1442 case RISCV::Select_FPR64_Using_CC_GPR: 1443 return emitSelectPseudo(MI, BB); 1444 case RISCV::BuildPairF64Pseudo: 1445 return emitBuildPairF64Pseudo(MI, BB); 1446 case RISCV::SplitF64Pseudo: 1447 return emitSplitF64Pseudo(MI, BB); 1448 } 1449 } 1450 1451 // Calling Convention Implementation. 1452 // The expectations for frontend ABI lowering vary from target to target. 1453 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI 1454 // details, but this is a longer term goal. For now, we simply try to keep the 1455 // role of the frontend as simple and well-defined as possible. The rules can 1456 // be summarised as: 1457 // * Never split up large scalar arguments. We handle them here. 1458 // * If a hardfloat calling convention is being used, and the struct may be 1459 // passed in a pair of registers (fp+fp, int+fp), and both registers are 1460 // available, then pass as two separate arguments. If either the GPRs or FPRs 1461 // are exhausted, then pass according to the rule below. 1462 // * If a struct could never be passed in registers or directly in a stack 1463 // slot (as it is larger than 2*XLEN and the floating point rules don't 1464 // apply), then pass it using a pointer with the byval attribute. 1465 // * If a struct is less than 2*XLEN, then coerce to either a two-element 1466 // word-sized array or a 2*XLEN scalar (depending on alignment). 1467 // * The frontend can determine whether a struct is returned by reference or 1468 // not based on its size and fields. If it will be returned by reference, the 1469 // frontend must modify the prototype so a pointer with the sret annotation is 1470 // passed as the first argument. This is not necessary for large scalar 1471 // returns. 1472 // * Struct return values and varargs should be coerced to structs containing 1473 // register-size fields in the same situations they would be for fixed 1474 // arguments. 1475 1476 static const MCPhysReg ArgGPRs[] = { 1477 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, 1478 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 1479 }; 1480 static const MCPhysReg ArgFPR32s[] = { 1481 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, 1482 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F 1483 }; 1484 static const MCPhysReg ArgFPR64s[] = { 1485 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, 1486 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D 1487 }; 1488 1489 // Pass a 2*XLEN argument that has been split into two XLEN values through 1490 // registers or the stack as necessary. 1491 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, 1492 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, 1493 MVT ValVT2, MVT LocVT2, 1494 ISD::ArgFlagsTy ArgFlags2) { 1495 unsigned XLenInBytes = XLen / 8; 1496 if (Register Reg = State.AllocateReg(ArgGPRs)) { 1497 // At least one half can be passed via register. 1498 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, 1499 VA1.getLocVT(), CCValAssign::Full)); 1500 } else { 1501 // Both halves must be passed on the stack, with proper alignment. 1502 Align StackAlign = 1503 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); 1504 State.addLoc( 1505 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), 1506 State.AllocateStack(XLenInBytes, StackAlign), 1507 VA1.getLocVT(), CCValAssign::Full)); 1508 State.addLoc(CCValAssign::getMem( 1509 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 1510 LocVT2, CCValAssign::Full)); 1511 return false; 1512 } 1513 1514 if (Register Reg = State.AllocateReg(ArgGPRs)) { 1515 // The second half can also be passed via register. 1516 State.addLoc( 1517 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); 1518 } else { 1519 // The second half is passed via the stack, without additional alignment. 1520 State.addLoc(CCValAssign::getMem( 1521 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)), 1522 LocVT2, CCValAssign::Full)); 1523 } 1524 1525 return false; 1526 } 1527 1528 // Implements the RISC-V calling convention. Returns true upon failure. 1529 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 1530 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 1531 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 1532 bool IsRet, Type *OrigTy) { 1533 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); 1534 assert(XLen == 32 || XLen == 64); 1535 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; 1536 1537 // Any return value split in to more than two values can't be returned 1538 // directly. 1539 if (IsRet && ValNo > 1) 1540 return true; 1541 1542 // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a 1543 // variadic argument, or if no F32 argument registers are available. 1544 bool UseGPRForF32 = true; 1545 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a 1546 // variadic argument, or if no F64 argument registers are available. 1547 bool UseGPRForF64 = true; 1548 1549 switch (ABI) { 1550 default: 1551 llvm_unreachable("Unexpected ABI"); 1552 case RISCVABI::ABI_ILP32: 1553 case RISCVABI::ABI_LP64: 1554 break; 1555 case RISCVABI::ABI_ILP32F: 1556 case RISCVABI::ABI_LP64F: 1557 UseGPRForF32 = !IsFixed; 1558 break; 1559 case RISCVABI::ABI_ILP32D: 1560 case RISCVABI::ABI_LP64D: 1561 UseGPRForF32 = !IsFixed; 1562 UseGPRForF64 = !IsFixed; 1563 break; 1564 } 1565 1566 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) 1567 UseGPRForF32 = true; 1568 if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s)) 1569 UseGPRForF64 = true; 1570 1571 // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local 1572 // variables rather than directly checking against the target ABI. 1573 1574 if (UseGPRForF32 && ValVT == MVT::f32) { 1575 LocVT = XLenVT; 1576 LocInfo = CCValAssign::BCvt; 1577 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { 1578 LocVT = MVT::i64; 1579 LocInfo = CCValAssign::BCvt; 1580 } 1581 1582 // If this is a variadic argument, the RISC-V calling convention requires 1583 // that it is assigned an 'even' or 'aligned' register if it has 8-byte 1584 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should 1585 // be used regardless of whether the original argument was split during 1586 // legalisation or not. The argument will not be passed by registers if the 1587 // original type is larger than 2*XLEN, so the register alignment rule does 1588 // not apply. 1589 unsigned TwoXLenInBytes = (2 * XLen) / 8; 1590 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && 1591 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { 1592 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); 1593 // Skip 'odd' register if necessary. 1594 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) 1595 State.AllocateReg(ArgGPRs); 1596 } 1597 1598 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); 1599 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = 1600 State.getPendingArgFlags(); 1601 1602 assert(PendingLocs.size() == PendingArgFlags.size() && 1603 "PendingLocs and PendingArgFlags out of sync"); 1604 1605 // Handle passing f64 on RV32D with a soft float ABI or when floating point 1606 // registers are exhausted. 1607 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { 1608 assert(!ArgFlags.isSplit() && PendingLocs.empty() && 1609 "Can't lower f64 if it is split"); 1610 // Depending on available argument GPRS, f64 may be passed in a pair of 1611 // GPRs, split between a GPR and the stack, or passed completely on the 1612 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these 1613 // cases. 1614 Register Reg = State.AllocateReg(ArgGPRs); 1615 LocVT = MVT::i32; 1616 if (!Reg) { 1617 unsigned StackOffset = State.AllocateStack(8, Align(8)); 1618 State.addLoc( 1619 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 1620 return false; 1621 } 1622 if (!State.AllocateReg(ArgGPRs)) 1623 State.AllocateStack(4, Align(4)); 1624 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1625 return false; 1626 } 1627 1628 // Split arguments might be passed indirectly, so keep track of the pending 1629 // values. 1630 if (ArgFlags.isSplit() || !PendingLocs.empty()) { 1631 LocVT = XLenVT; 1632 LocInfo = CCValAssign::Indirect; 1633 PendingLocs.push_back( 1634 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 1635 PendingArgFlags.push_back(ArgFlags); 1636 if (!ArgFlags.isSplitEnd()) { 1637 return false; 1638 } 1639 } 1640 1641 // If the split argument only had two elements, it should be passed directly 1642 // in registers or on the stack. 1643 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { 1644 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); 1645 // Apply the normal calling convention rules to the first half of the 1646 // split argument. 1647 CCValAssign VA = PendingLocs[0]; 1648 ISD::ArgFlagsTy AF = PendingArgFlags[0]; 1649 PendingLocs.clear(); 1650 PendingArgFlags.clear(); 1651 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, 1652 ArgFlags); 1653 } 1654 1655 // Allocate to a register if possible, or else a stack slot. 1656 Register Reg; 1657 if (ValVT == MVT::f32 && !UseGPRForF32) 1658 Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s); 1659 else if (ValVT == MVT::f64 && !UseGPRForF64) 1660 Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s); 1661 else 1662 Reg = State.AllocateReg(ArgGPRs); 1663 unsigned StackOffset = 1664 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8)); 1665 1666 // If we reach this point and PendingLocs is non-empty, we must be at the 1667 // end of a split argument that must be passed indirectly. 1668 if (!PendingLocs.empty()) { 1669 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); 1670 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); 1671 1672 for (auto &It : PendingLocs) { 1673 if (Reg) 1674 It.convertToReg(Reg); 1675 else 1676 It.convertToMem(StackOffset); 1677 State.addLoc(It); 1678 } 1679 PendingLocs.clear(); 1680 PendingArgFlags.clear(); 1681 return false; 1682 } 1683 1684 assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) && 1685 "Expected an XLenVT at this stage"); 1686 1687 if (Reg) { 1688 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1689 return false; 1690 } 1691 1692 // When an f32 or f64 is passed on the stack, no bit-conversion is needed. 1693 if (ValVT == MVT::f32 || ValVT == MVT::f64) { 1694 LocVT = ValVT; 1695 LocInfo = CCValAssign::Full; 1696 } 1697 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); 1698 return false; 1699 } 1700 1701 void RISCVTargetLowering::analyzeInputArgs( 1702 MachineFunction &MF, CCState &CCInfo, 1703 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { 1704 unsigned NumArgs = Ins.size(); 1705 FunctionType *FType = MF.getFunction().getFunctionType(); 1706 1707 for (unsigned i = 0; i != NumArgs; ++i) { 1708 MVT ArgVT = Ins[i].VT; 1709 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; 1710 1711 Type *ArgTy = nullptr; 1712 if (IsRet) 1713 ArgTy = FType->getReturnType(); 1714 else if (Ins[i].isOrigArg()) 1715 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); 1716 1717 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 1718 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 1719 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { 1720 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " 1721 << EVT(ArgVT).getEVTString() << '\n'); 1722 llvm_unreachable(nullptr); 1723 } 1724 } 1725 } 1726 1727 void RISCVTargetLowering::analyzeOutputArgs( 1728 MachineFunction &MF, CCState &CCInfo, 1729 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, 1730 CallLoweringInfo *CLI) const { 1731 unsigned NumArgs = Outs.size(); 1732 1733 for (unsigned i = 0; i != NumArgs; i++) { 1734 MVT ArgVT = Outs[i].VT; 1735 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 1736 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; 1737 1738 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 1739 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, 1740 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { 1741 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " 1742 << EVT(ArgVT).getEVTString() << "\n"); 1743 llvm_unreachable(nullptr); 1744 } 1745 } 1746 } 1747 1748 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect 1749 // values. 1750 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, 1751 const CCValAssign &VA, const SDLoc &DL) { 1752 switch (VA.getLocInfo()) { 1753 default: 1754 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1755 case CCValAssign::Full: 1756 break; 1757 case CCValAssign::BCvt: 1758 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 1759 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); 1760 break; 1761 } 1762 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); 1763 break; 1764 } 1765 return Val; 1766 } 1767 1768 // The caller is responsible for loading the full value if the argument is 1769 // passed with CCValAssign::Indirect. 1770 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, 1771 const CCValAssign &VA, const SDLoc &DL) { 1772 MachineFunction &MF = DAG.getMachineFunction(); 1773 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1774 EVT LocVT = VA.getLocVT(); 1775 SDValue Val; 1776 const TargetRegisterClass *RC; 1777 1778 switch (LocVT.getSimpleVT().SimpleTy) { 1779 default: 1780 llvm_unreachable("Unexpected register type"); 1781 case MVT::i32: 1782 case MVT::i64: 1783 RC = &RISCV::GPRRegClass; 1784 break; 1785 case MVT::f32: 1786 RC = &RISCV::FPR32RegClass; 1787 break; 1788 case MVT::f64: 1789 RC = &RISCV::FPR64RegClass; 1790 break; 1791 } 1792 1793 Register VReg = RegInfo.createVirtualRegister(RC); 1794 RegInfo.addLiveIn(VA.getLocReg(), VReg); 1795 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1796 1797 if (VA.getLocInfo() == CCValAssign::Indirect) 1798 return Val; 1799 1800 return convertLocVTToValVT(DAG, Val, VA, DL); 1801 } 1802 1803 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, 1804 const CCValAssign &VA, const SDLoc &DL) { 1805 EVT LocVT = VA.getLocVT(); 1806 1807 switch (VA.getLocInfo()) { 1808 default: 1809 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1810 case CCValAssign::Full: 1811 break; 1812 case CCValAssign::BCvt: 1813 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { 1814 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); 1815 break; 1816 } 1817 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); 1818 break; 1819 } 1820 return Val; 1821 } 1822 1823 // The caller is responsible for loading the full value if the argument is 1824 // passed with CCValAssign::Indirect. 1825 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, 1826 const CCValAssign &VA, const SDLoc &DL) { 1827 MachineFunction &MF = DAG.getMachineFunction(); 1828 MachineFrameInfo &MFI = MF.getFrameInfo(); 1829 EVT LocVT = VA.getLocVT(); 1830 EVT ValVT = VA.getValVT(); 1831 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); 1832 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, 1833 VA.getLocMemOffset(), /*Immutable=*/true); 1834 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1835 SDValue Val; 1836 1837 ISD::LoadExtType ExtType; 1838 switch (VA.getLocInfo()) { 1839 default: 1840 llvm_unreachable("Unexpected CCValAssign::LocInfo"); 1841 case CCValAssign::Full: 1842 case CCValAssign::Indirect: 1843 case CCValAssign::BCvt: 1844 ExtType = ISD::NON_EXTLOAD; 1845 break; 1846 } 1847 Val = DAG.getExtLoad( 1848 ExtType, DL, LocVT, Chain, FIN, 1849 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); 1850 return Val; 1851 } 1852 1853 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, 1854 const CCValAssign &VA, const SDLoc &DL) { 1855 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && 1856 "Unexpected VA"); 1857 MachineFunction &MF = DAG.getMachineFunction(); 1858 MachineFrameInfo &MFI = MF.getFrameInfo(); 1859 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1860 1861 if (VA.isMemLoc()) { 1862 // f64 is passed on the stack. 1863 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); 1864 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1865 return DAG.getLoad(MVT::f64, DL, Chain, FIN, 1866 MachinePointerInfo::getFixedStack(MF, FI)); 1867 } 1868 1869 assert(VA.isRegLoc() && "Expected register VA assignment"); 1870 1871 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1872 RegInfo.addLiveIn(VA.getLocReg(), LoVReg); 1873 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); 1874 SDValue Hi; 1875 if (VA.getLocReg() == RISCV::X17) { 1876 // Second half of f64 is passed on the stack. 1877 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); 1878 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); 1879 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, 1880 MachinePointerInfo::getFixedStack(MF, FI)); 1881 } else { 1882 // Second half of f64 is passed in another GPR. 1883 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); 1884 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); 1885 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); 1886 } 1887 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1888 } 1889 1890 // FastCC has less than 1% performance improvement for some particular 1891 // benchmark. But theoretically, it may has benenfit for some cases. 1892 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, 1893 CCValAssign::LocInfo LocInfo, 1894 ISD::ArgFlagsTy ArgFlags, CCState &State) { 1895 1896 if (LocVT == MVT::i32 || LocVT == MVT::i64) { 1897 // X5 and X6 might be used for save-restore libcall. 1898 static const MCPhysReg GPRList[] = { 1899 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, 1900 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, 1901 RISCV::X29, RISCV::X30, RISCV::X31}; 1902 if (unsigned Reg = State.AllocateReg(GPRList)) { 1903 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1904 return false; 1905 } 1906 } 1907 1908 if (LocVT == MVT::f32) { 1909 static const MCPhysReg FPR32List[] = { 1910 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, 1911 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, 1912 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, 1913 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; 1914 if (unsigned Reg = State.AllocateReg(FPR32List)) { 1915 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1916 return false; 1917 } 1918 } 1919 1920 if (LocVT == MVT::f64) { 1921 static const MCPhysReg FPR64List[] = { 1922 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, 1923 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, 1924 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, 1925 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; 1926 if (unsigned Reg = State.AllocateReg(FPR64List)) { 1927 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 1928 return false; 1929 } 1930 } 1931 1932 if (LocVT == MVT::i32 || LocVT == MVT::f32) { 1933 unsigned Offset4 = State.AllocateStack(4, Align(4)); 1934 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); 1935 return false; 1936 } 1937 1938 if (LocVT == MVT::i64 || LocVT == MVT::f64) { 1939 unsigned Offset5 = State.AllocateStack(8, Align(8)); 1940 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); 1941 return false; 1942 } 1943 1944 return true; // CC didn't match. 1945 } 1946 1947 // Transform physical registers into virtual registers. 1948 SDValue RISCVTargetLowering::LowerFormalArguments( 1949 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1950 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1951 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1952 1953 switch (CallConv) { 1954 default: 1955 report_fatal_error("Unsupported calling convention"); 1956 case CallingConv::C: 1957 case CallingConv::Fast: 1958 break; 1959 } 1960 1961 MachineFunction &MF = DAG.getMachineFunction(); 1962 1963 const Function &Func = MF.getFunction(); 1964 if (Func.hasFnAttribute("interrupt")) { 1965 if (!Func.arg_empty()) 1966 report_fatal_error( 1967 "Functions with the interrupt attribute cannot have arguments!"); 1968 1969 StringRef Kind = 1970 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 1971 1972 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) 1973 report_fatal_error( 1974 "Function interrupt attribute argument not supported!"); 1975 } 1976 1977 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1978 MVT XLenVT = Subtarget.getXLenVT(); 1979 unsigned XLenInBytes = Subtarget.getXLen() / 8; 1980 // Used with vargs to acumulate store chains. 1981 std::vector<SDValue> OutChains; 1982 1983 // Assign locations to all of the incoming arguments. 1984 SmallVector<CCValAssign, 16> ArgLocs; 1985 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1986 1987 if (CallConv == CallingConv::Fast) 1988 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); 1989 else 1990 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); 1991 1992 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1993 CCValAssign &VA = ArgLocs[i]; 1994 SDValue ArgValue; 1995 // Passing f64 on RV32D with a soft float ABI must be handled as a special 1996 // case. 1997 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) 1998 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); 1999 else if (VA.isRegLoc()) 2000 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); 2001 else 2002 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); 2003 2004 if (VA.getLocInfo() == CCValAssign::Indirect) { 2005 // If the original argument was split and passed by reference (e.g. i128 2006 // on RV32), we need to load all parts of it here (using the same 2007 // address). 2008 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 2009 MachinePointerInfo())); 2010 unsigned ArgIndex = Ins[i].OrigArgIndex; 2011 assert(Ins[i].PartOffset == 0); 2012 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { 2013 CCValAssign &PartVA = ArgLocs[i + 1]; 2014 unsigned PartOffset = Ins[i + 1].PartOffset; 2015 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 2016 DAG.getIntPtrConstant(PartOffset, DL)); 2017 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 2018 MachinePointerInfo())); 2019 ++i; 2020 } 2021 continue; 2022 } 2023 InVals.push_back(ArgValue); 2024 } 2025 2026 if (IsVarArg) { 2027 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); 2028 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); 2029 const TargetRegisterClass *RC = &RISCV::GPRRegClass; 2030 MachineFrameInfo &MFI = MF.getFrameInfo(); 2031 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2032 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); 2033 2034 // Offset of the first variable argument from stack pointer, and size of 2035 // the vararg save area. For now, the varargs save area is either zero or 2036 // large enough to hold a0-a7. 2037 int VaArgOffset, VarArgsSaveSize; 2038 2039 // If all registers are allocated, then all varargs must be passed on the 2040 // stack and we don't need to save any argregs. 2041 if (ArgRegs.size() == Idx) { 2042 VaArgOffset = CCInfo.getNextStackOffset(); 2043 VarArgsSaveSize = 0; 2044 } else { 2045 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); 2046 VaArgOffset = -VarArgsSaveSize; 2047 } 2048 2049 // Record the frame index of the first variable argument 2050 // which is a value necessary to VASTART. 2051 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 2052 RVFI->setVarArgsFrameIndex(FI); 2053 2054 // If saving an odd number of registers then create an extra stack slot to 2055 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures 2056 // offsets to even-numbered registered remain 2*XLEN-aligned. 2057 if (Idx % 2) { 2058 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); 2059 VarArgsSaveSize += XLenInBytes; 2060 } 2061 2062 // Copy the integer registers that may have been used for passing varargs 2063 // to the vararg save area. 2064 for (unsigned I = Idx; I < ArgRegs.size(); 2065 ++I, VaArgOffset += XLenInBytes) { 2066 const Register Reg = RegInfo.createVirtualRegister(RC); 2067 RegInfo.addLiveIn(ArgRegs[I], Reg); 2068 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); 2069 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); 2070 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2071 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, 2072 MachinePointerInfo::getFixedStack(MF, FI)); 2073 cast<StoreSDNode>(Store.getNode()) 2074 ->getMemOperand() 2075 ->setValue((Value *)nullptr); 2076 OutChains.push_back(Store); 2077 } 2078 RVFI->setVarArgsSaveSize(VarArgsSaveSize); 2079 } 2080 2081 // All stores are grouped in one node to allow the matching between 2082 // the size of Ins and InVals. This only happens for vararg functions. 2083 if (!OutChains.empty()) { 2084 OutChains.push_back(Chain); 2085 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); 2086 } 2087 2088 return Chain; 2089 } 2090 2091 /// isEligibleForTailCallOptimization - Check whether the call is eligible 2092 /// for tail call optimization. 2093 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. 2094 bool RISCVTargetLowering::isEligibleForTailCallOptimization( 2095 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 2096 const SmallVector<CCValAssign, 16> &ArgLocs) const { 2097 2098 auto &Callee = CLI.Callee; 2099 auto CalleeCC = CLI.CallConv; 2100 auto &Outs = CLI.Outs; 2101 auto &Caller = MF.getFunction(); 2102 auto CallerCC = Caller.getCallingConv(); 2103 2104 // Exception-handling functions need a special set of instructions to 2105 // indicate a return to the hardware. Tail-calling another function would 2106 // probably break this. 2107 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This 2108 // should be expanded as new function attributes are introduced. 2109 if (Caller.hasFnAttribute("interrupt")) 2110 return false; 2111 2112 // Do not tail call opt if the stack is used to pass parameters. 2113 if (CCInfo.getNextStackOffset() != 0) 2114 return false; 2115 2116 // Do not tail call opt if any parameters need to be passed indirectly. 2117 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are 2118 // passed indirectly. So the address of the value will be passed in a 2119 // register, or if not available, then the address is put on the stack. In 2120 // order to pass indirectly, space on the stack often needs to be allocated 2121 // in order to store the value. In this case the CCInfo.getNextStackOffset() 2122 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs 2123 // are passed CCValAssign::Indirect. 2124 for (auto &VA : ArgLocs) 2125 if (VA.getLocInfo() == CCValAssign::Indirect) 2126 return false; 2127 2128 // Do not tail call opt if either caller or callee uses struct return 2129 // semantics. 2130 auto IsCallerStructRet = Caller.hasStructRetAttr(); 2131 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); 2132 if (IsCallerStructRet || IsCalleeStructRet) 2133 return false; 2134 2135 // Externally-defined functions with weak linkage should not be 2136 // tail-called. The behaviour of branch instructions in this situation (as 2137 // used for tail calls) is implementation-defined, so we cannot rely on the 2138 // linker replacing the tail call with a return. 2139 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2140 const GlobalValue *GV = G->getGlobal(); 2141 if (GV->hasExternalWeakLinkage()) 2142 return false; 2143 } 2144 2145 // The callee has to preserve all registers the caller needs to preserve. 2146 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2147 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); 2148 if (CalleeCC != CallerCC) { 2149 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); 2150 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) 2151 return false; 2152 } 2153 2154 // Byval parameters hand the function a pointer directly into the stack area 2155 // we want to reuse during a tail call. Working around this *is* possible 2156 // but less efficient and uglier in LowerCall. 2157 for (auto &Arg : Outs) 2158 if (Arg.Flags.isByVal()) 2159 return false; 2160 2161 return true; 2162 } 2163 2164 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input 2165 // and output parameter nodes. 2166 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, 2167 SmallVectorImpl<SDValue> &InVals) const { 2168 SelectionDAG &DAG = CLI.DAG; 2169 SDLoc &DL = CLI.DL; 2170 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 2171 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 2172 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 2173 SDValue Chain = CLI.Chain; 2174 SDValue Callee = CLI.Callee; 2175 bool &IsTailCall = CLI.IsTailCall; 2176 CallingConv::ID CallConv = CLI.CallConv; 2177 bool IsVarArg = CLI.IsVarArg; 2178 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2179 MVT XLenVT = Subtarget.getXLenVT(); 2180 2181 MachineFunction &MF = DAG.getMachineFunction(); 2182 2183 // Analyze the operands of the call, assigning locations to each operand. 2184 SmallVector<CCValAssign, 16> ArgLocs; 2185 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 2186 2187 if (CallConv == CallingConv::Fast) 2188 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); 2189 else 2190 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); 2191 2192 // Check if it's really possible to do a tail call. 2193 if (IsTailCall) 2194 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); 2195 2196 if (IsTailCall) 2197 ++NumTailCalls; 2198 else if (CLI.CB && CLI.CB->isMustTailCall()) 2199 report_fatal_error("failed to perform tail call elimination on a call " 2200 "site marked musttail"); 2201 2202 // Get a count of how many bytes are to be pushed on the stack. 2203 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 2204 2205 // Create local copies for byval args 2206 SmallVector<SDValue, 8> ByValArgs; 2207 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2208 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2209 if (!Flags.isByVal()) 2210 continue; 2211 2212 SDValue Arg = OutVals[i]; 2213 unsigned Size = Flags.getByValSize(); 2214 Align Alignment = Flags.getNonZeroByValAlign(); 2215 2216 int FI = 2217 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false); 2218 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2219 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); 2220 2221 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, 2222 /*IsVolatile=*/false, 2223 /*AlwaysInline=*/false, IsTailCall, 2224 MachinePointerInfo(), MachinePointerInfo()); 2225 ByValArgs.push_back(FIPtr); 2226 } 2227 2228 if (!IsTailCall) 2229 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); 2230 2231 // Copy argument values to their designated locations. 2232 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; 2233 SmallVector<SDValue, 8> MemOpChains; 2234 SDValue StackPtr; 2235 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { 2236 CCValAssign &VA = ArgLocs[i]; 2237 SDValue ArgValue = OutVals[i]; 2238 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2239 2240 // Handle passing f64 on RV32D with a soft float ABI as a special case. 2241 bool IsF64OnRV32DSoftABI = 2242 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; 2243 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { 2244 SDValue SplitF64 = DAG.getNode( 2245 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); 2246 SDValue Lo = SplitF64.getValue(0); 2247 SDValue Hi = SplitF64.getValue(1); 2248 2249 Register RegLo = VA.getLocReg(); 2250 RegsToPass.push_back(std::make_pair(RegLo, Lo)); 2251 2252 if (RegLo == RISCV::X17) { 2253 // Second half of f64 is passed on the stack. 2254 // Work out the address of the stack slot. 2255 if (!StackPtr.getNode()) 2256 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 2257 // Emit the store. 2258 MemOpChains.push_back( 2259 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); 2260 } else { 2261 // Second half of f64 is passed in another GPR. 2262 assert(RegLo < RISCV::X31 && "Invalid register pair"); 2263 Register RegHigh = RegLo + 1; 2264 RegsToPass.push_back(std::make_pair(RegHigh, Hi)); 2265 } 2266 continue; 2267 } 2268 2269 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way 2270 // as any other MemLoc. 2271 2272 // Promote the value if needed. 2273 // For now, only handle fully promoted and indirect arguments. 2274 if (VA.getLocInfo() == CCValAssign::Indirect) { 2275 // Store the argument in a stack slot and pass its address. 2276 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); 2277 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 2278 MemOpChains.push_back( 2279 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 2280 MachinePointerInfo::getFixedStack(MF, FI))); 2281 // If the original argument was split (e.g. i128), we need 2282 // to store all parts of it here (and pass just one address). 2283 unsigned ArgIndex = Outs[i].OrigArgIndex; 2284 assert(Outs[i].PartOffset == 0); 2285 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { 2286 SDValue PartValue = OutVals[i + 1]; 2287 unsigned PartOffset = Outs[i + 1].PartOffset; 2288 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 2289 DAG.getIntPtrConstant(PartOffset, DL)); 2290 MemOpChains.push_back( 2291 DAG.getStore(Chain, DL, PartValue, Address, 2292 MachinePointerInfo::getFixedStack(MF, FI))); 2293 ++i; 2294 } 2295 ArgValue = SpillSlot; 2296 } else { 2297 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL); 2298 } 2299 2300 // Use local copy if it is a byval arg. 2301 if (Flags.isByVal()) 2302 ArgValue = ByValArgs[j++]; 2303 2304 if (VA.isRegLoc()) { 2305 // Queue up the argument copies and emit them at the end. 2306 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 2307 } else { 2308 assert(VA.isMemLoc() && "Argument not register or memory"); 2309 assert(!IsTailCall && "Tail call not allowed if stack is used " 2310 "for passing parameters"); 2311 2312 // Work out the address of the stack slot. 2313 if (!StackPtr.getNode()) 2314 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); 2315 SDValue Address = 2316 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 2317 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); 2318 2319 // Emit the store. 2320 MemOpChains.push_back( 2321 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 2322 } 2323 } 2324 2325 // Join the stores, which are independent of one another. 2326 if (!MemOpChains.empty()) 2327 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 2328 2329 SDValue Glue; 2330 2331 // Build a sequence of copy-to-reg nodes, chained and glued together. 2332 for (auto &Reg : RegsToPass) { 2333 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); 2334 Glue = Chain.getValue(1); 2335 } 2336 2337 // Validate that none of the argument registers have been marked as 2338 // reserved, if so report an error. Do the same for the return address if this 2339 // is not a tailcall. 2340 validateCCReservedRegs(RegsToPass, MF); 2341 if (!IsTailCall && 2342 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1)) 2343 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 2344 MF.getFunction(), 2345 "Return address register required, but has been reserved."}); 2346 2347 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a 2348 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't 2349 // split it and then direct call can be matched by PseudoCALL. 2350 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { 2351 const GlobalValue *GV = S->getGlobal(); 2352 2353 unsigned OpFlags = RISCVII::MO_CALL; 2354 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) 2355 OpFlags = RISCVII::MO_PLT; 2356 2357 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); 2358 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2359 unsigned OpFlags = RISCVII::MO_CALL; 2360 2361 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), 2362 nullptr)) 2363 OpFlags = RISCVII::MO_PLT; 2364 2365 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); 2366 } 2367 2368 // The first call operand is the chain and the second is the target address. 2369 SmallVector<SDValue, 8> Ops; 2370 Ops.push_back(Chain); 2371 Ops.push_back(Callee); 2372 2373 // Add argument registers to the end of the list so that they are 2374 // known live into the call. 2375 for (auto &Reg : RegsToPass) 2376 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 2377 2378 if (!IsTailCall) { 2379 // Add a register mask operand representing the call-preserved registers. 2380 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2381 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 2382 assert(Mask && "Missing call preserved mask for calling convention"); 2383 Ops.push_back(DAG.getRegisterMask(Mask)); 2384 } 2385 2386 // Glue the call to the argument copies, if any. 2387 if (Glue.getNode()) 2388 Ops.push_back(Glue); 2389 2390 // Emit the call. 2391 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2392 2393 if (IsTailCall) { 2394 MF.getFrameInfo().setHasTailCall(); 2395 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); 2396 } 2397 2398 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); 2399 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 2400 Glue = Chain.getValue(1); 2401 2402 // Mark the end of the call, which is glued to the call itself. 2403 Chain = DAG.getCALLSEQ_END(Chain, 2404 DAG.getConstant(NumBytes, DL, PtrVT, true), 2405 DAG.getConstant(0, DL, PtrVT, true), 2406 Glue, DL); 2407 Glue = Chain.getValue(1); 2408 2409 // Assign locations to each value returned by this call. 2410 SmallVector<CCValAssign, 16> RVLocs; 2411 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); 2412 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); 2413 2414 // Copy all of the result registers out of their specified physreg. 2415 for (auto &VA : RVLocs) { 2416 // Copy the value out 2417 SDValue RetValue = 2418 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); 2419 // Glue the RetValue to the end of the call sequence 2420 Chain = RetValue.getValue(1); 2421 Glue = RetValue.getValue(2); 2422 2423 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 2424 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); 2425 SDValue RetValue2 = 2426 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); 2427 Chain = RetValue2.getValue(1); 2428 Glue = RetValue2.getValue(2); 2429 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, 2430 RetValue2); 2431 } 2432 2433 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL); 2434 2435 InVals.push_back(RetValue); 2436 } 2437 2438 return Chain; 2439 } 2440 2441 bool RISCVTargetLowering::CanLowerReturn( 2442 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, 2443 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 2444 SmallVector<CCValAssign, 16> RVLocs; 2445 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); 2446 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2447 MVT VT = Outs[i].VT; 2448 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 2449 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); 2450 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, 2451 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) 2452 return false; 2453 } 2454 return true; 2455 } 2456 2457 SDValue 2458 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2459 bool IsVarArg, 2460 const SmallVectorImpl<ISD::OutputArg> &Outs, 2461 const SmallVectorImpl<SDValue> &OutVals, 2462 const SDLoc &DL, SelectionDAG &DAG) const { 2463 const MachineFunction &MF = DAG.getMachineFunction(); 2464 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 2465 2466 // Stores the assignment of the return value to a location. 2467 SmallVector<CCValAssign, 16> RVLocs; 2468 2469 // Info about the registers and stack slot. 2470 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, 2471 *DAG.getContext()); 2472 2473 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, 2474 nullptr); 2475 2476 SDValue Glue; 2477 SmallVector<SDValue, 4> RetOps(1, Chain); 2478 2479 // Copy the result values into the output registers. 2480 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { 2481 SDValue Val = OutVals[i]; 2482 CCValAssign &VA = RVLocs[i]; 2483 assert(VA.isRegLoc() && "Can only return in registers!"); 2484 2485 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { 2486 // Handle returning f64 on RV32D with a soft float ABI. 2487 assert(VA.isRegLoc() && "Expected return via registers"); 2488 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, 2489 DAG.getVTList(MVT::i32, MVT::i32), Val); 2490 SDValue Lo = SplitF64.getValue(0); 2491 SDValue Hi = SplitF64.getValue(1); 2492 Register RegLo = VA.getLocReg(); 2493 assert(RegLo < RISCV::X31 && "Invalid register pair"); 2494 Register RegHi = RegLo + 1; 2495 2496 if (STI.isRegisterReservedByUser(RegLo) || 2497 STI.isRegisterReservedByUser(RegHi)) 2498 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 2499 MF.getFunction(), 2500 "Return value register required, but has been reserved."}); 2501 2502 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); 2503 Glue = Chain.getValue(1); 2504 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); 2505 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); 2506 Glue = Chain.getValue(1); 2507 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); 2508 } else { 2509 // Handle a 'normal' return. 2510 Val = convertValVTToLocVT(DAG, Val, VA, DL); 2511 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); 2512 2513 if (STI.isRegisterReservedByUser(VA.getLocReg())) 2514 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 2515 MF.getFunction(), 2516 "Return value register required, but has been reserved."}); 2517 2518 // Guarantee that all emitted copies are stuck together. 2519 Glue = Chain.getValue(1); 2520 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2521 } 2522 } 2523 2524 RetOps[0] = Chain; // Update chain. 2525 2526 // Add the glue node if we have it. 2527 if (Glue.getNode()) { 2528 RetOps.push_back(Glue); 2529 } 2530 2531 // Interrupt service routines use different return instructions. 2532 const Function &Func = DAG.getMachineFunction().getFunction(); 2533 if (Func.hasFnAttribute("interrupt")) { 2534 if (!Func.getReturnType()->isVoidTy()) 2535 report_fatal_error( 2536 "Functions with the interrupt attribute must have void return type!"); 2537 2538 MachineFunction &MF = DAG.getMachineFunction(); 2539 StringRef Kind = 2540 MF.getFunction().getFnAttribute("interrupt").getValueAsString(); 2541 2542 unsigned RetOpc; 2543 if (Kind == "user") 2544 RetOpc = RISCVISD::URET_FLAG; 2545 else if (Kind == "supervisor") 2546 RetOpc = RISCVISD::SRET_FLAG; 2547 else 2548 RetOpc = RISCVISD::MRET_FLAG; 2549 2550 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); 2551 } 2552 2553 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); 2554 } 2555 2556 void RISCVTargetLowering::validateCCReservedRegs( 2557 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 2558 MachineFunction &MF) const { 2559 const Function &F = MF.getFunction(); 2560 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>(); 2561 2562 if (std::any_of(std::begin(Regs), std::end(Regs), [&STI](auto Reg) { 2563 return STI.isRegisterReservedByUser(Reg.first); 2564 })) 2565 F.getContext().diagnose(DiagnosticInfoUnsupported{ 2566 F, "Argument register required, but has been reserved."}); 2567 } 2568 2569 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 2570 return CI->isTailCall(); 2571 } 2572 2573 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { 2574 switch ((RISCVISD::NodeType)Opcode) { 2575 case RISCVISD::FIRST_NUMBER: 2576 break; 2577 case RISCVISD::RET_FLAG: 2578 return "RISCVISD::RET_FLAG"; 2579 case RISCVISD::URET_FLAG: 2580 return "RISCVISD::URET_FLAG"; 2581 case RISCVISD::SRET_FLAG: 2582 return "RISCVISD::SRET_FLAG"; 2583 case RISCVISD::MRET_FLAG: 2584 return "RISCVISD::MRET_FLAG"; 2585 case RISCVISD::CALL: 2586 return "RISCVISD::CALL"; 2587 case RISCVISD::SELECT_CC: 2588 return "RISCVISD::SELECT_CC"; 2589 case RISCVISD::BuildPairF64: 2590 return "RISCVISD::BuildPairF64"; 2591 case RISCVISD::SplitF64: 2592 return "RISCVISD::SplitF64"; 2593 case RISCVISD::TAIL: 2594 return "RISCVISD::TAIL"; 2595 case RISCVISD::SLLW: 2596 return "RISCVISD::SLLW"; 2597 case RISCVISD::SRAW: 2598 return "RISCVISD::SRAW"; 2599 case RISCVISD::SRLW: 2600 return "RISCVISD::SRLW"; 2601 case RISCVISD::DIVW: 2602 return "RISCVISD::DIVW"; 2603 case RISCVISD::DIVUW: 2604 return "RISCVISD::DIVUW"; 2605 case RISCVISD::REMUW: 2606 return "RISCVISD::REMUW"; 2607 case RISCVISD::FMV_W_X_RV64: 2608 return "RISCVISD::FMV_W_X_RV64"; 2609 case RISCVISD::FMV_X_ANYEXTW_RV64: 2610 return "RISCVISD::FMV_X_ANYEXTW_RV64"; 2611 case RISCVISD::READ_CYCLE_WIDE: 2612 return "RISCVISD::READ_CYCLE_WIDE"; 2613 } 2614 return nullptr; 2615 } 2616 2617 /// getConstraintType - Given a constraint letter, return the type of 2618 /// constraint it is for this target. 2619 RISCVTargetLowering::ConstraintType 2620 RISCVTargetLowering::getConstraintType(StringRef Constraint) const { 2621 if (Constraint.size() == 1) { 2622 switch (Constraint[0]) { 2623 default: 2624 break; 2625 case 'f': 2626 return C_RegisterClass; 2627 case 'I': 2628 case 'J': 2629 case 'K': 2630 return C_Immediate; 2631 case 'A': 2632 return C_Memory; 2633 } 2634 } 2635 return TargetLowering::getConstraintType(Constraint); 2636 } 2637 2638 std::pair<unsigned, const TargetRegisterClass *> 2639 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 2640 StringRef Constraint, 2641 MVT VT) const { 2642 // First, see if this is a constraint that directly corresponds to a 2643 // RISCV register class. 2644 if (Constraint.size() == 1) { 2645 switch (Constraint[0]) { 2646 case 'r': 2647 return std::make_pair(0U, &RISCV::GPRRegClass); 2648 case 'f': 2649 if (Subtarget.hasStdExtF() && VT == MVT::f32) 2650 return std::make_pair(0U, &RISCV::FPR32RegClass); 2651 if (Subtarget.hasStdExtD() && VT == MVT::f64) 2652 return std::make_pair(0U, &RISCV::FPR64RegClass); 2653 break; 2654 default: 2655 break; 2656 } 2657 } 2658 2659 // Clang will correctly decode the usage of register name aliases into their 2660 // official names. However, other frontends like `rustc` do not. This allows 2661 // users of these frontends to use the ABI names for registers in LLVM-style 2662 // register constraints. 2663 Register XRegFromAlias = StringSwitch<Register>(Constraint.lower()) 2664 .Case("{zero}", RISCV::X0) 2665 .Case("{ra}", RISCV::X1) 2666 .Case("{sp}", RISCV::X2) 2667 .Case("{gp}", RISCV::X3) 2668 .Case("{tp}", RISCV::X4) 2669 .Case("{t0}", RISCV::X5) 2670 .Case("{t1}", RISCV::X6) 2671 .Case("{t2}", RISCV::X7) 2672 .Cases("{s0}", "{fp}", RISCV::X8) 2673 .Case("{s1}", RISCV::X9) 2674 .Case("{a0}", RISCV::X10) 2675 .Case("{a1}", RISCV::X11) 2676 .Case("{a2}", RISCV::X12) 2677 .Case("{a3}", RISCV::X13) 2678 .Case("{a4}", RISCV::X14) 2679 .Case("{a5}", RISCV::X15) 2680 .Case("{a6}", RISCV::X16) 2681 .Case("{a7}", RISCV::X17) 2682 .Case("{s2}", RISCV::X18) 2683 .Case("{s3}", RISCV::X19) 2684 .Case("{s4}", RISCV::X20) 2685 .Case("{s5}", RISCV::X21) 2686 .Case("{s6}", RISCV::X22) 2687 .Case("{s7}", RISCV::X23) 2688 .Case("{s8}", RISCV::X24) 2689 .Case("{s9}", RISCV::X25) 2690 .Case("{s10}", RISCV::X26) 2691 .Case("{s11}", RISCV::X27) 2692 .Case("{t3}", RISCV::X28) 2693 .Case("{t4}", RISCV::X29) 2694 .Case("{t5}", RISCV::X30) 2695 .Case("{t6}", RISCV::X31) 2696 .Default(RISCV::NoRegister); 2697 if (XRegFromAlias != RISCV::NoRegister) 2698 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); 2699 2700 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the 2701 // TableGen record rather than the AsmName to choose registers for InlineAsm 2702 // constraints, plus we want to match those names to the widest floating point 2703 // register type available, manually select floating point registers here. 2704 // 2705 // The second case is the ABI name of the register, so that frontends can also 2706 // use the ABI names in register constraint lists. 2707 if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) { 2708 std::pair<Register, Register> FReg = 2709 StringSwitch<std::pair<Register, Register>>(Constraint.lower()) 2710 .Cases("{f0}", "{ft0}", {RISCV::F0_F, RISCV::F0_D}) 2711 .Cases("{f1}", "{ft1}", {RISCV::F1_F, RISCV::F1_D}) 2712 .Cases("{f2}", "{ft2}", {RISCV::F2_F, RISCV::F2_D}) 2713 .Cases("{f3}", "{ft3}", {RISCV::F3_F, RISCV::F3_D}) 2714 .Cases("{f4}", "{ft4}", {RISCV::F4_F, RISCV::F4_D}) 2715 .Cases("{f5}", "{ft5}", {RISCV::F5_F, RISCV::F5_D}) 2716 .Cases("{f6}", "{ft6}", {RISCV::F6_F, RISCV::F6_D}) 2717 .Cases("{f7}", "{ft7}", {RISCV::F7_F, RISCV::F7_D}) 2718 .Cases("{f8}", "{fs0}", {RISCV::F8_F, RISCV::F8_D}) 2719 .Cases("{f9}", "{fs1}", {RISCV::F9_F, RISCV::F9_D}) 2720 .Cases("{f10}", "{fa0}", {RISCV::F10_F, RISCV::F10_D}) 2721 .Cases("{f11}", "{fa1}", {RISCV::F11_F, RISCV::F11_D}) 2722 .Cases("{f12}", "{fa2}", {RISCV::F12_F, RISCV::F12_D}) 2723 .Cases("{f13}", "{fa3}", {RISCV::F13_F, RISCV::F13_D}) 2724 .Cases("{f14}", "{fa4}", {RISCV::F14_F, RISCV::F14_D}) 2725 .Cases("{f15}", "{fa5}", {RISCV::F15_F, RISCV::F15_D}) 2726 .Cases("{f16}", "{fa6}", {RISCV::F16_F, RISCV::F16_D}) 2727 .Cases("{f17}", "{fa7}", {RISCV::F17_F, RISCV::F17_D}) 2728 .Cases("{f18}", "{fs2}", {RISCV::F18_F, RISCV::F18_D}) 2729 .Cases("{f19}", "{fs3}", {RISCV::F19_F, RISCV::F19_D}) 2730 .Cases("{f20}", "{fs4}", {RISCV::F20_F, RISCV::F20_D}) 2731 .Cases("{f21}", "{fs5}", {RISCV::F21_F, RISCV::F21_D}) 2732 .Cases("{f22}", "{fs6}", {RISCV::F22_F, RISCV::F22_D}) 2733 .Cases("{f23}", "{fs7}", {RISCV::F23_F, RISCV::F23_D}) 2734 .Cases("{f24}", "{fs8}", {RISCV::F24_F, RISCV::F24_D}) 2735 .Cases("{f25}", "{fs9}", {RISCV::F25_F, RISCV::F25_D}) 2736 .Cases("{f26}", "{fs10}", {RISCV::F26_F, RISCV::F26_D}) 2737 .Cases("{f27}", "{fs11}", {RISCV::F27_F, RISCV::F27_D}) 2738 .Cases("{f28}", "{ft8}", {RISCV::F28_F, RISCV::F28_D}) 2739 .Cases("{f29}", "{ft9}", {RISCV::F29_F, RISCV::F29_D}) 2740 .Cases("{f30}", "{ft10}", {RISCV::F30_F, RISCV::F30_D}) 2741 .Cases("{f31}", "{ft11}", {RISCV::F31_F, RISCV::F31_D}) 2742 .Default({RISCV::NoRegister, RISCV::NoRegister}); 2743 if (FReg.first != RISCV::NoRegister) 2744 return Subtarget.hasStdExtD() 2745 ? std::make_pair(FReg.second, &RISCV::FPR64RegClass) 2746 : std::make_pair(FReg.first, &RISCV::FPR32RegClass); 2747 } 2748 2749 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 2750 } 2751 2752 unsigned 2753 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 2754 // Currently only support length 1 constraints. 2755 if (ConstraintCode.size() == 1) { 2756 switch (ConstraintCode[0]) { 2757 case 'A': 2758 return InlineAsm::Constraint_A; 2759 default: 2760 break; 2761 } 2762 } 2763 2764 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 2765 } 2766 2767 void RISCVTargetLowering::LowerAsmOperandForConstraint( 2768 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 2769 SelectionDAG &DAG) const { 2770 // Currently only support length 1 constraints. 2771 if (Constraint.length() == 1) { 2772 switch (Constraint[0]) { 2773 case 'I': 2774 // Validate & create a 12-bit signed immediate operand. 2775 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 2776 uint64_t CVal = C->getSExtValue(); 2777 if (isInt<12>(CVal)) 2778 Ops.push_back( 2779 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 2780 } 2781 return; 2782 case 'J': 2783 // Validate & create an integer zero operand. 2784 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 2785 if (C->getZExtValue() == 0) 2786 Ops.push_back( 2787 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); 2788 return; 2789 case 'K': 2790 // Validate & create a 5-bit unsigned immediate operand. 2791 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 2792 uint64_t CVal = C->getZExtValue(); 2793 if (isUInt<5>(CVal)) 2794 Ops.push_back( 2795 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); 2796 } 2797 return; 2798 default: 2799 break; 2800 } 2801 } 2802 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 2803 } 2804 2805 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 2806 Instruction *Inst, 2807 AtomicOrdering Ord) const { 2808 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) 2809 return Builder.CreateFence(Ord); 2810 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) 2811 return Builder.CreateFence(AtomicOrdering::Release); 2812 return nullptr; 2813 } 2814 2815 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 2816 Instruction *Inst, 2817 AtomicOrdering Ord) const { 2818 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) 2819 return Builder.CreateFence(AtomicOrdering::Acquire); 2820 return nullptr; 2821 } 2822 2823 TargetLowering::AtomicExpansionKind 2824 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 2825 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating 2826 // point operations can't be used in an lr/sc sequence without breaking the 2827 // forward-progress guarantee. 2828 if (AI->isFloatingPointOperation()) 2829 return AtomicExpansionKind::CmpXChg; 2830 2831 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); 2832 if (Size == 8 || Size == 16) 2833 return AtomicExpansionKind::MaskedIntrinsic; 2834 return AtomicExpansionKind::None; 2835 } 2836 2837 static Intrinsic::ID 2838 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { 2839 if (XLen == 32) { 2840 switch (BinOp) { 2841 default: 2842 llvm_unreachable("Unexpected AtomicRMW BinOp"); 2843 case AtomicRMWInst::Xchg: 2844 return Intrinsic::riscv_masked_atomicrmw_xchg_i32; 2845 case AtomicRMWInst::Add: 2846 return Intrinsic::riscv_masked_atomicrmw_add_i32; 2847 case AtomicRMWInst::Sub: 2848 return Intrinsic::riscv_masked_atomicrmw_sub_i32; 2849 case AtomicRMWInst::Nand: 2850 return Intrinsic::riscv_masked_atomicrmw_nand_i32; 2851 case AtomicRMWInst::Max: 2852 return Intrinsic::riscv_masked_atomicrmw_max_i32; 2853 case AtomicRMWInst::Min: 2854 return Intrinsic::riscv_masked_atomicrmw_min_i32; 2855 case AtomicRMWInst::UMax: 2856 return Intrinsic::riscv_masked_atomicrmw_umax_i32; 2857 case AtomicRMWInst::UMin: 2858 return Intrinsic::riscv_masked_atomicrmw_umin_i32; 2859 } 2860 } 2861 2862 if (XLen == 64) { 2863 switch (BinOp) { 2864 default: 2865 llvm_unreachable("Unexpected AtomicRMW BinOp"); 2866 case AtomicRMWInst::Xchg: 2867 return Intrinsic::riscv_masked_atomicrmw_xchg_i64; 2868 case AtomicRMWInst::Add: 2869 return Intrinsic::riscv_masked_atomicrmw_add_i64; 2870 case AtomicRMWInst::Sub: 2871 return Intrinsic::riscv_masked_atomicrmw_sub_i64; 2872 case AtomicRMWInst::Nand: 2873 return Intrinsic::riscv_masked_atomicrmw_nand_i64; 2874 case AtomicRMWInst::Max: 2875 return Intrinsic::riscv_masked_atomicrmw_max_i64; 2876 case AtomicRMWInst::Min: 2877 return Intrinsic::riscv_masked_atomicrmw_min_i64; 2878 case AtomicRMWInst::UMax: 2879 return Intrinsic::riscv_masked_atomicrmw_umax_i64; 2880 case AtomicRMWInst::UMin: 2881 return Intrinsic::riscv_masked_atomicrmw_umin_i64; 2882 } 2883 } 2884 2885 llvm_unreachable("Unexpected XLen\n"); 2886 } 2887 2888 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( 2889 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, 2890 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { 2891 unsigned XLen = Subtarget.getXLen(); 2892 Value *Ordering = 2893 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); 2894 Type *Tys[] = {AlignedAddr->getType()}; 2895 Function *LrwOpScwLoop = Intrinsic::getDeclaration( 2896 AI->getModule(), 2897 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); 2898 2899 if (XLen == 64) { 2900 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); 2901 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2902 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); 2903 } 2904 2905 Value *Result; 2906 2907 // Must pass the shift amount needed to sign extend the loaded value prior 2908 // to performing a signed comparison for min/max. ShiftAmt is the number of 2909 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which 2910 // is the number of bits to left+right shift the value in order to 2911 // sign-extend. 2912 if (AI->getOperation() == AtomicRMWInst::Min || 2913 AI->getOperation() == AtomicRMWInst::Max) { 2914 const DataLayout &DL = AI->getModule()->getDataLayout(); 2915 unsigned ValWidth = 2916 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); 2917 Value *SextShamt = 2918 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); 2919 Result = Builder.CreateCall(LrwOpScwLoop, 2920 {AlignedAddr, Incr, Mask, SextShamt, Ordering}); 2921 } else { 2922 Result = 2923 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); 2924 } 2925 2926 if (XLen == 64) 2927 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2928 return Result; 2929 } 2930 2931 TargetLowering::AtomicExpansionKind 2932 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( 2933 AtomicCmpXchgInst *CI) const { 2934 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); 2935 if (Size == 8 || Size == 16) 2936 return AtomicExpansionKind::MaskedIntrinsic; 2937 return AtomicExpansionKind::None; 2938 } 2939 2940 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( 2941 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, 2942 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { 2943 unsigned XLen = Subtarget.getXLen(); 2944 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); 2945 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; 2946 if (XLen == 64) { 2947 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); 2948 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); 2949 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); 2950 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; 2951 } 2952 Type *Tys[] = {AlignedAddr->getType()}; 2953 Function *MaskedCmpXchg = 2954 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); 2955 Value *Result = Builder.CreateCall( 2956 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); 2957 if (XLen == 64) 2958 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); 2959 return Result; 2960 } 2961 2962 Register RISCVTargetLowering::getExceptionPointerRegister( 2963 const Constant *PersonalityFn) const { 2964 return RISCV::X10; 2965 } 2966 2967 Register RISCVTargetLowering::getExceptionSelectorRegister( 2968 const Constant *PersonalityFn) const { 2969 return RISCV::X11; 2970 } 2971 2972 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { 2973 // Return false to suppress the unnecessary extensions if the LibCall 2974 // arguments or return value is f32 type for LP64 ABI. 2975 RISCVABI::ABI ABI = Subtarget.getTargetABI(); 2976 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) 2977 return false; 2978 2979 return true; 2980 } 2981 2982 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, 2983 SDValue C) const { 2984 // Check integral scalar types. 2985 if (VT.isScalarInteger()) { 2986 // Do not perform the transformation on riscv32 with the M extension. 2987 if (!Subtarget.is64Bit() && Subtarget.hasStdExtM()) 2988 return false; 2989 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { 2990 if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t)) 2991 return false; 2992 int64_t Imm = ConstNode->getSExtValue(); 2993 if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) || 2994 isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm)) 2995 return true; 2996 } 2997 } 2998 2999 return false; 3000 } 3001 3002 #define GET_REGISTER_MATCHER 3003 #include "RISCVGenAsmMatcher.inc" 3004 3005 Register 3006 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT, 3007 const MachineFunction &MF) const { 3008 Register Reg = MatchRegisterAltName(RegName); 3009 if (Reg == RISCV::NoRegister) 3010 Reg = MatchRegisterName(RegName); 3011 if (Reg == RISCV::NoRegister) 3012 report_fatal_error( 3013 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 3014 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); 3015 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg)) 3016 report_fatal_error(Twine("Trying to obtain non-reserved register \"" + 3017 StringRef(RegName) + "\".")); 3018 return Reg; 3019 } 3020