xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVRegisterInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "RISCVTargetMachine.h"
20 #include "Utils/RISCVMatInt.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/SelectionDAGISel.h"
29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30 #include "llvm/CodeGen/ValueTypes.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/DiagnosticPrinter.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "riscv-lower"
42 
43 STATISTIC(NumTailCalls, "Number of tail calls");
44 
45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46                                          const RISCVSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   if (Subtarget.isRV32E())
50     report_fatal_error("Codegen not yet implemented for RV32E");
51 
52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54 
55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56       !Subtarget.hasStdExtF()) {
57     errs() << "Hard-float 'f' ABI can't be used for a target that "
58                 "doesn't support the F instruction set extension (ignoring "
59                           "target-abi)\n";
60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62              !Subtarget.hasStdExtD()) {
63     errs() << "Hard-float 'd' ABI can't be used for a target that "
64               "doesn't support the D instruction set extension (ignoring "
65               "target-abi)\n";
66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67   }
68 
69   switch (ABI) {
70   default:
71     report_fatal_error("Don't know how to lower this ABI");
72   case RISCVABI::ABI_ILP32:
73   case RISCVABI::ABI_ILP32F:
74   case RISCVABI::ABI_ILP32D:
75   case RISCVABI::ABI_LP64:
76   case RISCVABI::ABI_LP64F:
77   case RISCVABI::ABI_LP64D:
78     break;
79   }
80 
81   MVT XLenVT = Subtarget.getXLenVT();
82 
83   // Set up the register classes.
84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85 
86   if (Subtarget.hasStdExtF())
87     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
88   if (Subtarget.hasStdExtD())
89     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
90 
91   // Compute derived properties from the register classes.
92   computeRegisterProperties(STI.getRegisterInfo());
93 
94   setStackPointerRegisterToSaveRestore(RISCV::X2);
95 
96   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
97     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
98 
99   // TODO: add all necessary setOperationAction calls.
100   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
101 
102   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
103   setOperationAction(ISD::BR_CC, XLenVT, Expand);
104   setOperationAction(ISD::SELECT, XLenVT, Custom);
105   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
106 
107   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
108   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
109 
110   setOperationAction(ISD::VASTART, MVT::Other, Custom);
111   setOperationAction(ISD::VAARG, MVT::Other, Expand);
112   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
113   setOperationAction(ISD::VAEND, MVT::Other, Expand);
114 
115   for (auto VT : {MVT::i1, MVT::i8, MVT::i16})
116     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
117 
118   if (Subtarget.is64Bit()) {
119     setOperationAction(ISD::ADD, MVT::i32, Custom);
120     setOperationAction(ISD::SUB, MVT::i32, Custom);
121     setOperationAction(ISD::SHL, MVT::i32, Custom);
122     setOperationAction(ISD::SRA, MVT::i32, Custom);
123     setOperationAction(ISD::SRL, MVT::i32, Custom);
124   }
125 
126   if (!Subtarget.hasStdExtM()) {
127     setOperationAction(ISD::MUL, XLenVT, Expand);
128     setOperationAction(ISD::MULHS, XLenVT, Expand);
129     setOperationAction(ISD::MULHU, XLenVT, Expand);
130     setOperationAction(ISD::SDIV, XLenVT, Expand);
131     setOperationAction(ISD::UDIV, XLenVT, Expand);
132     setOperationAction(ISD::SREM, XLenVT, Expand);
133     setOperationAction(ISD::UREM, XLenVT, Expand);
134   }
135 
136   if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
137     setOperationAction(ISD::MUL, MVT::i32, Custom);
138     setOperationAction(ISD::SDIV, MVT::i32, Custom);
139     setOperationAction(ISD::UDIV, MVT::i32, Custom);
140     setOperationAction(ISD::UREM, MVT::i32, Custom);
141   }
142 
143   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
144   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
145   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
146   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
147 
148   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
149   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
150   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
151 
152   setOperationAction(ISD::ROTL, XLenVT, Expand);
153   setOperationAction(ISD::ROTR, XLenVT, Expand);
154   setOperationAction(ISD::BSWAP, XLenVT, Expand);
155   setOperationAction(ISD::CTTZ, XLenVT, Expand);
156   setOperationAction(ISD::CTLZ, XLenVT, Expand);
157   setOperationAction(ISD::CTPOP, XLenVT, Expand);
158 
159   ISD::CondCode FPCCToExtend[] = {
160       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
161       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
162       ISD::SETGE,  ISD::SETNE};
163 
164   ISD::NodeType FPOpToExtend[] = {
165       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
166       ISD::FP_TO_FP16};
167 
168   if (Subtarget.hasStdExtF()) {
169     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
170     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
171     for (auto CC : FPCCToExtend)
172       setCondCodeAction(CC, MVT::f32, Expand);
173     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
174     setOperationAction(ISD::SELECT, MVT::f32, Custom);
175     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
176     for (auto Op : FPOpToExtend)
177       setOperationAction(Op, MVT::f32, Expand);
178     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
179     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
180   }
181 
182   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
183     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
184 
185   if (Subtarget.hasStdExtD()) {
186     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
187     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
188     for (auto CC : FPCCToExtend)
189       setCondCodeAction(CC, MVT::f64, Expand);
190     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
191     setOperationAction(ISD::SELECT, MVT::f64, Custom);
192     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
193     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
194     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
195     for (auto Op : FPOpToExtend)
196       setOperationAction(Op, MVT::f64, Expand);
197     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
198     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
199   }
200 
201   if (Subtarget.is64Bit() &&
202       !(Subtarget.hasStdExtD() || Subtarget.hasStdExtF())) {
203     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
204     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
205     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
206     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
207   }
208 
209   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
210   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
211   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
212 
213   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
214 
215   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
216   // Unfortunately this can't be determined just from the ISA naming string.
217   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
218                      Subtarget.is64Bit() ? Legal : Custom);
219 
220   setOperationAction(ISD::TRAP, MVT::Other, Legal);
221   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
222   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
223 
224   if (Subtarget.hasStdExtA()) {
225     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
226     setMinCmpXchgSizeInBits(32);
227   } else {
228     setMaxAtomicSizeInBitsSupported(0);
229   }
230 
231   setBooleanContents(ZeroOrOneBooleanContent);
232 
233   // Function alignments.
234   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
235   setMinFunctionAlignment(FunctionAlignment);
236   setPrefFunctionAlignment(FunctionAlignment);
237 
238   // Effectively disable jump table generation.
239   setMinimumJumpTableEntries(INT_MAX);
240 
241   // Jumps are expensive, compared to logic
242   setJumpIsExpensive();
243 
244   // We can use any register for comparisons
245   setHasMultipleConditionRegisters();
246 }
247 
248 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
249                                             EVT VT) const {
250   if (!VT.isVector())
251     return getPointerTy(DL);
252   return VT.changeVectorElementTypeToInteger();
253 }
254 
255 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
256                                              const CallInst &I,
257                                              MachineFunction &MF,
258                                              unsigned Intrinsic) const {
259   switch (Intrinsic) {
260   default:
261     return false;
262   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
263   case Intrinsic::riscv_masked_atomicrmw_add_i32:
264   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
265   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
266   case Intrinsic::riscv_masked_atomicrmw_max_i32:
267   case Intrinsic::riscv_masked_atomicrmw_min_i32:
268   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
269   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
270   case Intrinsic::riscv_masked_cmpxchg_i32:
271     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
272     Info.opc = ISD::INTRINSIC_W_CHAIN;
273     Info.memVT = MVT::getVT(PtrTy->getElementType());
274     Info.ptrVal = I.getArgOperand(0);
275     Info.offset = 0;
276     Info.align = Align(4);
277     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
278                  MachineMemOperand::MOVolatile;
279     return true;
280   }
281 }
282 
283 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
284                                                 const AddrMode &AM, Type *Ty,
285                                                 unsigned AS,
286                                                 Instruction *I) const {
287   // No global is ever allowed as a base.
288   if (AM.BaseGV)
289     return false;
290 
291   // Require a 12-bit signed offset.
292   if (!isInt<12>(AM.BaseOffs))
293     return false;
294 
295   switch (AM.Scale) {
296   case 0: // "r+i" or just "i", depending on HasBaseReg.
297     break;
298   case 1:
299     if (!AM.HasBaseReg) // allow "r+i".
300       break;
301     return false; // disallow "r+r" or "r+r+i".
302   default:
303     return false;
304   }
305 
306   return true;
307 }
308 
309 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
310   return isInt<12>(Imm);
311 }
312 
313 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
314   return isInt<12>(Imm);
315 }
316 
317 // On RV32, 64-bit integers are split into their high and low parts and held
318 // in two different registers, so the trunc is free since the low register can
319 // just be used.
320 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
321   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
322     return false;
323   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
324   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
325   return (SrcBits == 64 && DestBits == 32);
326 }
327 
328 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
329   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
330       !SrcVT.isInteger() || !DstVT.isInteger())
331     return false;
332   unsigned SrcBits = SrcVT.getSizeInBits();
333   unsigned DestBits = DstVT.getSizeInBits();
334   return (SrcBits == 64 && DestBits == 32);
335 }
336 
337 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
338   // Zexts are free if they can be combined with a load.
339   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
340     EVT MemVT = LD->getMemoryVT();
341     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
342          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
343         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
344          LD->getExtensionType() == ISD::ZEXTLOAD))
345       return true;
346   }
347 
348   return TargetLowering::isZExtFree(Val, VT2);
349 }
350 
351 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
352   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
353 }
354 
355 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
356                                        bool ForCodeSize) const {
357   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
358     return false;
359   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
360     return false;
361   if (Imm.isNegZero())
362     return false;
363   return Imm.isZero();
364 }
365 
366 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
367   return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
368          (VT == MVT::f64 && Subtarget.hasStdExtD());
369 }
370 
371 // Changes the condition code and swaps operands if necessary, so the SetCC
372 // operation matches one of the comparisons supported directly in the RISC-V
373 // ISA.
374 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
375   switch (CC) {
376   default:
377     break;
378   case ISD::SETGT:
379   case ISD::SETLE:
380   case ISD::SETUGT:
381   case ISD::SETULE:
382     CC = ISD::getSetCCSwappedOperands(CC);
383     std::swap(LHS, RHS);
384     break;
385   }
386 }
387 
388 // Return the RISC-V branch opcode that matches the given DAG integer
389 // condition code. The CondCode must be one of those supported by the RISC-V
390 // ISA (see normaliseSetCC).
391 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
392   switch (CC) {
393   default:
394     llvm_unreachable("Unsupported CondCode");
395   case ISD::SETEQ:
396     return RISCV::BEQ;
397   case ISD::SETNE:
398     return RISCV::BNE;
399   case ISD::SETLT:
400     return RISCV::BLT;
401   case ISD::SETGE:
402     return RISCV::BGE;
403   case ISD::SETULT:
404     return RISCV::BLTU;
405   case ISD::SETUGE:
406     return RISCV::BGEU;
407   }
408 }
409 
410 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
411                                             SelectionDAG &DAG) const {
412   switch (Op.getOpcode()) {
413   default:
414     report_fatal_error("unimplemented operand");
415   case ISD::GlobalAddress:
416     return lowerGlobalAddress(Op, DAG);
417   case ISD::BlockAddress:
418     return lowerBlockAddress(Op, DAG);
419   case ISD::ConstantPool:
420     return lowerConstantPool(Op, DAG);
421   case ISD::GlobalTLSAddress:
422     return lowerGlobalTLSAddress(Op, DAG);
423   case ISD::SELECT:
424     return lowerSELECT(Op, DAG);
425   case ISD::VASTART:
426     return lowerVASTART(Op, DAG);
427   case ISD::FRAMEADDR:
428     return lowerFRAMEADDR(Op, DAG);
429   case ISD::RETURNADDR:
430     return lowerRETURNADDR(Op, DAG);
431   case ISD::SHL_PARTS:
432     return lowerShiftLeftParts(Op, DAG);
433   case ISD::SRA_PARTS:
434     return lowerShiftRightParts(Op, DAG, true);
435   case ISD::SRL_PARTS:
436     return lowerShiftRightParts(Op, DAG, false);
437   case ISD::BITCAST: {
438     assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
439            "Unexpected custom legalisation");
440     SDLoc DL(Op);
441     SDValue Op0 = Op.getOperand(0);
442     if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
443       return SDValue();
444     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
445     SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
446     return FPConv;
447   }
448   case ISD::INTRINSIC_WO_CHAIN:
449     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
450   }
451 }
452 
453 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
454                              SelectionDAG &DAG, unsigned Flags) {
455   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
456 }
457 
458 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
459                              SelectionDAG &DAG, unsigned Flags) {
460   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
461                                    Flags);
462 }
463 
464 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
465                              SelectionDAG &DAG, unsigned Flags) {
466   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
467                                    N->getOffset(), Flags);
468 }
469 
470 template <class NodeTy>
471 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
472                                      bool IsLocal) const {
473   SDLoc DL(N);
474   EVT Ty = getPointerTy(DAG.getDataLayout());
475 
476   if (isPositionIndependent()) {
477     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
478     if (IsLocal)
479       // Use PC-relative addressing to access the symbol. This generates the
480       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
481       // %pcrel_lo(auipc)).
482       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
483 
484     // Use PC-relative addressing to access the GOT for this symbol, then load
485     // the address from the GOT. This generates the pattern (PseudoLA sym),
486     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
487     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
488   }
489 
490   switch (getTargetMachine().getCodeModel()) {
491   default:
492     report_fatal_error("Unsupported code model for lowering");
493   case CodeModel::Small: {
494     // Generate a sequence for accessing addresses within the first 2 GiB of
495     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
496     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
497     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
498     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
499     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
500   }
501   case CodeModel::Medium: {
502     // Generate a sequence for accessing addresses within any 2GiB range within
503     // the address space. This generates the pattern (PseudoLLA sym), which
504     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
505     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
506     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
507   }
508   }
509 }
510 
511 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
512                                                 SelectionDAG &DAG) const {
513   SDLoc DL(Op);
514   EVT Ty = Op.getValueType();
515   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
516   int64_t Offset = N->getOffset();
517   MVT XLenVT = Subtarget.getXLenVT();
518 
519   const GlobalValue *GV = N->getGlobal();
520   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
521   SDValue Addr = getAddr(N, DAG, IsLocal);
522 
523   // In order to maximise the opportunity for common subexpression elimination,
524   // emit a separate ADD node for the global address offset instead of folding
525   // it in the global address node. Later peephole optimisations may choose to
526   // fold it back in when profitable.
527   if (Offset != 0)
528     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
529                        DAG.getConstant(Offset, DL, XLenVT));
530   return Addr;
531 }
532 
533 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
534                                                SelectionDAG &DAG) const {
535   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
536 
537   return getAddr(N, DAG);
538 }
539 
540 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
541                                                SelectionDAG &DAG) const {
542   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
543 
544   return getAddr(N, DAG);
545 }
546 
547 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
548                                               SelectionDAG &DAG,
549                                               bool UseGOT) const {
550   SDLoc DL(N);
551   EVT Ty = getPointerTy(DAG.getDataLayout());
552   const GlobalValue *GV = N->getGlobal();
553   MVT XLenVT = Subtarget.getXLenVT();
554 
555   if (UseGOT) {
556     // Use PC-relative addressing to access the GOT for this TLS symbol, then
557     // load the address from the GOT and add the thread pointer. This generates
558     // the pattern (PseudoLA_TLS_IE sym), which expands to
559     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
560     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
561     SDValue Load =
562         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
563 
564     // Add the thread pointer.
565     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
566     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
567   }
568 
569   // Generate a sequence for accessing the address relative to the thread
570   // pointer, with the appropriate adjustment for the thread pointer offset.
571   // This generates the pattern
572   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
573   SDValue AddrHi =
574       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
575   SDValue AddrAdd =
576       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
577   SDValue AddrLo =
578       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
579 
580   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
581   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
582   SDValue MNAdd = SDValue(
583       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
584       0);
585   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
586 }
587 
588 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
589                                                SelectionDAG &DAG) const {
590   SDLoc DL(N);
591   EVT Ty = getPointerTy(DAG.getDataLayout());
592   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
593   const GlobalValue *GV = N->getGlobal();
594 
595   // Use a PC-relative addressing mode to access the global dynamic GOT address.
596   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
597   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
598   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
599   SDValue Load =
600       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
601 
602   // Prepare argument list to generate call.
603   ArgListTy Args;
604   ArgListEntry Entry;
605   Entry.Node = Load;
606   Entry.Ty = CallTy;
607   Args.push_back(Entry);
608 
609   // Setup call to __tls_get_addr.
610   TargetLowering::CallLoweringInfo CLI(DAG);
611   CLI.setDebugLoc(DL)
612       .setChain(DAG.getEntryNode())
613       .setLibCallee(CallingConv::C, CallTy,
614                     DAG.getExternalSymbol("__tls_get_addr", Ty),
615                     std::move(Args));
616 
617   return LowerCallTo(CLI).first;
618 }
619 
620 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
621                                                    SelectionDAG &DAG) const {
622   SDLoc DL(Op);
623   EVT Ty = Op.getValueType();
624   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
625   int64_t Offset = N->getOffset();
626   MVT XLenVT = Subtarget.getXLenVT();
627 
628   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
629 
630   SDValue Addr;
631   switch (Model) {
632   case TLSModel::LocalExec:
633     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
634     break;
635   case TLSModel::InitialExec:
636     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
637     break;
638   case TLSModel::LocalDynamic:
639   case TLSModel::GeneralDynamic:
640     Addr = getDynamicTLSAddr(N, DAG);
641     break;
642   }
643 
644   // In order to maximise the opportunity for common subexpression elimination,
645   // emit a separate ADD node for the global address offset instead of folding
646   // it in the global address node. Later peephole optimisations may choose to
647   // fold it back in when profitable.
648   if (Offset != 0)
649     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
650                        DAG.getConstant(Offset, DL, XLenVT));
651   return Addr;
652 }
653 
654 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
655   SDValue CondV = Op.getOperand(0);
656   SDValue TrueV = Op.getOperand(1);
657   SDValue FalseV = Op.getOperand(2);
658   SDLoc DL(Op);
659   MVT XLenVT = Subtarget.getXLenVT();
660 
661   // If the result type is XLenVT and CondV is the output of a SETCC node
662   // which also operated on XLenVT inputs, then merge the SETCC node into the
663   // lowered RISCVISD::SELECT_CC to take advantage of the integer
664   // compare+branch instructions. i.e.:
665   // (select (setcc lhs, rhs, cc), truev, falsev)
666   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
667   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
668       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
669     SDValue LHS = CondV.getOperand(0);
670     SDValue RHS = CondV.getOperand(1);
671     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
672     ISD::CondCode CCVal = CC->get();
673 
674     normaliseSetCC(LHS, RHS, CCVal);
675 
676     SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
677     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
678     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
679     return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
680   }
681 
682   // Otherwise:
683   // (select condv, truev, falsev)
684   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
685   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
686   SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
687 
688   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
689   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
690 
691   return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops);
692 }
693 
694 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
695   MachineFunction &MF = DAG.getMachineFunction();
696   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
697 
698   SDLoc DL(Op);
699   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
700                                  getPointerTy(MF.getDataLayout()));
701 
702   // vastart just stores the address of the VarArgsFrameIndex slot into the
703   // memory location argument.
704   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
705   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
706                       MachinePointerInfo(SV));
707 }
708 
709 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
710                                             SelectionDAG &DAG) const {
711   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
712   MachineFunction &MF = DAG.getMachineFunction();
713   MachineFrameInfo &MFI = MF.getFrameInfo();
714   MFI.setFrameAddressIsTaken(true);
715   Register FrameReg = RI.getFrameRegister(MF);
716   int XLenInBytes = Subtarget.getXLen() / 8;
717 
718   EVT VT = Op.getValueType();
719   SDLoc DL(Op);
720   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
721   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
722   while (Depth--) {
723     int Offset = -(XLenInBytes * 2);
724     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
725                               DAG.getIntPtrConstant(Offset, DL));
726     FrameAddr =
727         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
728   }
729   return FrameAddr;
730 }
731 
732 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
733                                              SelectionDAG &DAG) const {
734   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
735   MachineFunction &MF = DAG.getMachineFunction();
736   MachineFrameInfo &MFI = MF.getFrameInfo();
737   MFI.setReturnAddressIsTaken(true);
738   MVT XLenVT = Subtarget.getXLenVT();
739   int XLenInBytes = Subtarget.getXLen() / 8;
740 
741   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
742     return SDValue();
743 
744   EVT VT = Op.getValueType();
745   SDLoc DL(Op);
746   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
747   if (Depth) {
748     int Off = -XLenInBytes;
749     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
750     SDValue Offset = DAG.getConstant(Off, DL, VT);
751     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
752                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
753                        MachinePointerInfo());
754   }
755 
756   // Return the value of the return address register, marking it an implicit
757   // live-in.
758   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
759   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
760 }
761 
762 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
763                                                  SelectionDAG &DAG) const {
764   SDLoc DL(Op);
765   SDValue Lo = Op.getOperand(0);
766   SDValue Hi = Op.getOperand(1);
767   SDValue Shamt = Op.getOperand(2);
768   EVT VT = Lo.getValueType();
769 
770   // if Shamt-XLEN < 0: // Shamt < XLEN
771   //   Lo = Lo << Shamt
772   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
773   // else:
774   //   Lo = 0
775   //   Hi = Lo << (Shamt-XLEN)
776 
777   SDValue Zero = DAG.getConstant(0, DL, VT);
778   SDValue One = DAG.getConstant(1, DL, VT);
779   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
780   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
781   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
782   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
783 
784   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
785   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
786   SDValue ShiftRightLo =
787       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
788   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
789   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
790   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
791 
792   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
793 
794   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
795   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
796 
797   SDValue Parts[2] = {Lo, Hi};
798   return DAG.getMergeValues(Parts, DL);
799 }
800 
801 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
802                                                   bool IsSRA) const {
803   SDLoc DL(Op);
804   SDValue Lo = Op.getOperand(0);
805   SDValue Hi = Op.getOperand(1);
806   SDValue Shamt = Op.getOperand(2);
807   EVT VT = Lo.getValueType();
808 
809   // SRA expansion:
810   //   if Shamt-XLEN < 0: // Shamt < XLEN
811   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
812   //     Hi = Hi >>s Shamt
813   //   else:
814   //     Lo = Hi >>s (Shamt-XLEN);
815   //     Hi = Hi >>s (XLEN-1)
816   //
817   // SRL expansion:
818   //   if Shamt-XLEN < 0: // Shamt < XLEN
819   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
820   //     Hi = Hi >>u Shamt
821   //   else:
822   //     Lo = Hi >>u (Shamt-XLEN);
823   //     Hi = 0;
824 
825   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
826 
827   SDValue Zero = DAG.getConstant(0, DL, VT);
828   SDValue One = DAG.getConstant(1, DL, VT);
829   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
830   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
831   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
832   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
833 
834   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
835   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
836   SDValue ShiftLeftHi =
837       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
838   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
839   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
840   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
841   SDValue HiFalse =
842       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
843 
844   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
845 
846   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
847   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
848 
849   SDValue Parts[2] = {Lo, Hi};
850   return DAG.getMergeValues(Parts, DL);
851 }
852 
853 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
854                                                      SelectionDAG &DAG) const {
855   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
856   SDLoc DL(Op);
857   switch (IntNo) {
858   default:
859     return SDValue();    // Don't custom lower most intrinsics.
860   case Intrinsic::thread_pointer: {
861     EVT PtrVT = getPointerTy(DAG.getDataLayout());
862     return DAG.getRegister(RISCV::X4, PtrVT);
863   }
864   }
865 }
866 
867 // Returns the opcode of the target-specific SDNode that implements the 32-bit
868 // form of the given Opcode.
869 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
870   switch (Opcode) {
871   default:
872     llvm_unreachable("Unexpected opcode");
873   case ISD::SHL:
874     return RISCVISD::SLLW;
875   case ISD::SRA:
876     return RISCVISD::SRAW;
877   case ISD::SRL:
878     return RISCVISD::SRLW;
879   case ISD::SDIV:
880     return RISCVISD::DIVW;
881   case ISD::UDIV:
882     return RISCVISD::DIVUW;
883   case ISD::UREM:
884     return RISCVISD::REMUW;
885   }
886 }
887 
888 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
889 // Because i32 isn't a legal type for RV64, these operations would otherwise
890 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
891 // later one because the fact the operation was originally of type i32 is
892 // lost.
893 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
894   SDLoc DL(N);
895   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
896   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
897   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
898   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
899   // ReplaceNodeResults requires we maintain the same type for the return value.
900   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
901 }
902 
903 // Converts the given 32-bit operation to a i64 operation with signed extension
904 // semantic to reduce the signed extension instructions.
905 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
906   SDLoc DL(N);
907   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
908   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
909   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
910   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
911                                DAG.getValueType(MVT::i32));
912   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
913 }
914 
915 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
916                                              SmallVectorImpl<SDValue> &Results,
917                                              SelectionDAG &DAG) const {
918   SDLoc DL(N);
919   switch (N->getOpcode()) {
920   default:
921     llvm_unreachable("Don't know how to custom type legalize this operation!");
922   case ISD::STRICT_FP_TO_SINT:
923   case ISD::STRICT_FP_TO_UINT:
924   case ISD::FP_TO_SINT:
925   case ISD::FP_TO_UINT: {
926     bool IsStrict = N->isStrictFPOpcode();
927     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
928            "Unexpected custom legalisation");
929     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
930     RTLIB::Libcall LC;
931     if (N->getOpcode() == ISD::FP_TO_SINT ||
932         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
933       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
934     else
935       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
936     MakeLibCallOptions CallOptions;
937     EVT OpVT = Op0.getValueType();
938     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
939     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
940     SDValue Result;
941     std::tie(Result, Chain) =
942         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
943     Results.push_back(Result);
944     if (IsStrict)
945       Results.push_back(Chain);
946     break;
947   }
948   case ISD::READCYCLECOUNTER: {
949     assert(!Subtarget.is64Bit() &&
950            "READCYCLECOUNTER only has custom type legalization on riscv32");
951 
952     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
953     SDValue RCW =
954         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
955 
956     Results.push_back(
957         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
958     Results.push_back(RCW.getValue(2));
959     break;
960   }
961   case ISD::ADD:
962   case ISD::SUB:
963   case ISD::MUL:
964     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
965            "Unexpected custom legalisation");
966     if (N->getOperand(1).getOpcode() == ISD::Constant)
967       return;
968     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
969     break;
970   case ISD::SHL:
971   case ISD::SRA:
972   case ISD::SRL:
973     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
974            "Unexpected custom legalisation");
975     if (N->getOperand(1).getOpcode() == ISD::Constant)
976       return;
977     Results.push_back(customLegalizeToWOp(N, DAG));
978     break;
979   case ISD::SDIV:
980   case ISD::UDIV:
981   case ISD::UREM:
982     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
983            Subtarget.hasStdExtM() && "Unexpected custom legalisation");
984     if (N->getOperand(0).getOpcode() == ISD::Constant ||
985         N->getOperand(1).getOpcode() == ISD::Constant)
986       return;
987     Results.push_back(customLegalizeToWOp(N, DAG));
988     break;
989   case ISD::BITCAST: {
990     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
991            Subtarget.hasStdExtF() && "Unexpected custom legalisation");
992     SDLoc DL(N);
993     SDValue Op0 = N->getOperand(0);
994     if (Op0.getValueType() != MVT::f32)
995       return;
996     SDValue FPConv =
997         DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
998     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
999     break;
1000   }
1001   }
1002 }
1003 
1004 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1005                                                DAGCombinerInfo &DCI) const {
1006   SelectionDAG &DAG = DCI.DAG;
1007 
1008   switch (N->getOpcode()) {
1009   default:
1010     break;
1011   case RISCVISD::SplitF64: {
1012     SDValue Op0 = N->getOperand(0);
1013     // If the input to SplitF64 is just BuildPairF64 then the operation is
1014     // redundant. Instead, use BuildPairF64's operands directly.
1015     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
1016       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
1017 
1018     SDLoc DL(N);
1019 
1020     // It's cheaper to materialise two 32-bit integers than to load a double
1021     // from the constant pool and transfer it to integer registers through the
1022     // stack.
1023     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
1024       APInt V = C->getValueAPF().bitcastToAPInt();
1025       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
1026       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
1027       return DCI.CombineTo(N, Lo, Hi);
1028     }
1029 
1030     // This is a target-specific version of a DAGCombine performed in
1031     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1032     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1033     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1034     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1035         !Op0.getNode()->hasOneUse())
1036       break;
1037     SDValue NewSplitF64 =
1038         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
1039                     Op0.getOperand(0));
1040     SDValue Lo = NewSplitF64.getValue(0);
1041     SDValue Hi = NewSplitF64.getValue(1);
1042     APInt SignBit = APInt::getSignMask(32);
1043     if (Op0.getOpcode() == ISD::FNEG) {
1044       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
1045                                   DAG.getConstant(SignBit, DL, MVT::i32));
1046       return DCI.CombineTo(N, Lo, NewHi);
1047     }
1048     assert(Op0.getOpcode() == ISD::FABS);
1049     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
1050                                 DAG.getConstant(~SignBit, DL, MVT::i32));
1051     return DCI.CombineTo(N, Lo, NewHi);
1052   }
1053   case RISCVISD::SLLW:
1054   case RISCVISD::SRAW:
1055   case RISCVISD::SRLW: {
1056     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
1057     SDValue LHS = N->getOperand(0);
1058     SDValue RHS = N->getOperand(1);
1059     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
1060     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
1061     if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
1062         (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
1063       return SDValue();
1064     break;
1065   }
1066   case RISCVISD::FMV_X_ANYEXTW_RV64: {
1067     SDLoc DL(N);
1068     SDValue Op0 = N->getOperand(0);
1069     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
1070     // conversion is unnecessary and can be replaced with an ANY_EXTEND
1071     // of the FMV_W_X_RV64 operand.
1072     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
1073       SDValue AExtOp =
1074           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
1075       return DCI.CombineTo(N, AExtOp);
1076     }
1077 
1078     // This is a target-specific version of a DAGCombine performed in
1079     // DAGCombiner::visitBITCAST. It performs the equivalent of:
1080     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
1081     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
1082     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
1083         !Op0.getNode()->hasOneUse())
1084       break;
1085     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
1086                                  Op0.getOperand(0));
1087     APInt SignBit = APInt::getSignMask(32).sext(64);
1088     if (Op0.getOpcode() == ISD::FNEG) {
1089       return DCI.CombineTo(N,
1090                            DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
1091                                        DAG.getConstant(SignBit, DL, MVT::i64)));
1092     }
1093     assert(Op0.getOpcode() == ISD::FABS);
1094     return DCI.CombineTo(N,
1095                          DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
1096                                      DAG.getConstant(~SignBit, DL, MVT::i64)));
1097   }
1098   }
1099 
1100   return SDValue();
1101 }
1102 
1103 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
1104     const SDNode *N, CombineLevel Level) const {
1105   // The following folds are only desirable if `(OP _, c1 << c2)` can be
1106   // materialised in fewer instructions than `(OP _, c1)`:
1107   //
1108   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1109   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1110   SDValue N0 = N->getOperand(0);
1111   EVT Ty = N0.getValueType();
1112   if (Ty.isScalarInteger() &&
1113       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
1114     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
1115     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
1116     if (C1 && C2) {
1117       APInt C1Int = C1->getAPIntValue();
1118       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
1119 
1120       // We can materialise `c1 << c2` into an add immediate, so it's "free",
1121       // and the combine should happen, to potentially allow further combines
1122       // later.
1123       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
1124           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
1125         return true;
1126 
1127       // We can materialise `c1` in an add immediate, so it's "free", and the
1128       // combine should be prevented.
1129       if (C1Int.getMinSignedBits() <= 64 &&
1130           isLegalAddImmediate(C1Int.getSExtValue()))
1131         return false;
1132 
1133       // Neither constant will fit into an immediate, so find materialisation
1134       // costs.
1135       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
1136                                               Subtarget.is64Bit());
1137       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
1138           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
1139 
1140       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
1141       // combine should be prevented.
1142       if (C1Cost < ShiftedC1Cost)
1143         return false;
1144     }
1145   }
1146   return true;
1147 }
1148 
1149 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
1150     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1151     unsigned Depth) const {
1152   switch (Op.getOpcode()) {
1153   default:
1154     break;
1155   case RISCVISD::SLLW:
1156   case RISCVISD::SRAW:
1157   case RISCVISD::SRLW:
1158   case RISCVISD::DIVW:
1159   case RISCVISD::DIVUW:
1160   case RISCVISD::REMUW:
1161     // TODO: As the result is sign-extended, this is conservatively correct. A
1162     // more precise answer could be calculated for SRAW depending on known
1163     // bits in the shift amount.
1164     return 33;
1165   }
1166 
1167   return 1;
1168 }
1169 
1170 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
1171                                                   MachineBasicBlock *BB) {
1172   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
1173 
1174   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
1175   // Should the count have wrapped while it was being read, we need to try
1176   // again.
1177   // ...
1178   // read:
1179   // rdcycleh x3 # load high word of cycle
1180   // rdcycle  x2 # load low word of cycle
1181   // rdcycleh x4 # load high word of cycle
1182   // bne x3, x4, read # check if high word reads match, otherwise try again
1183   // ...
1184 
1185   MachineFunction &MF = *BB->getParent();
1186   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1187   MachineFunction::iterator It = ++BB->getIterator();
1188 
1189   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1190   MF.insert(It, LoopMBB);
1191 
1192   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
1193   MF.insert(It, DoneMBB);
1194 
1195   // Transfer the remainder of BB and its successor edges to DoneMBB.
1196   DoneMBB->splice(DoneMBB->begin(), BB,
1197                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
1198   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
1199 
1200   BB->addSuccessor(LoopMBB);
1201 
1202   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1203   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1204   Register LoReg = MI.getOperand(0).getReg();
1205   Register HiReg = MI.getOperand(1).getReg();
1206   DebugLoc DL = MI.getDebugLoc();
1207 
1208   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1209   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
1210       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1211       .addReg(RISCV::X0);
1212   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
1213       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
1214       .addReg(RISCV::X0);
1215   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
1216       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
1217       .addReg(RISCV::X0);
1218 
1219   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
1220       .addReg(HiReg)
1221       .addReg(ReadAgainReg)
1222       .addMBB(LoopMBB);
1223 
1224   LoopMBB->addSuccessor(LoopMBB);
1225   LoopMBB->addSuccessor(DoneMBB);
1226 
1227   MI.eraseFromParent();
1228 
1229   return DoneMBB;
1230 }
1231 
1232 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
1233                                              MachineBasicBlock *BB) {
1234   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
1235 
1236   MachineFunction &MF = *BB->getParent();
1237   DebugLoc DL = MI.getDebugLoc();
1238   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1239   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1240   Register LoReg = MI.getOperand(0).getReg();
1241   Register HiReg = MI.getOperand(1).getReg();
1242   Register SrcReg = MI.getOperand(2).getReg();
1243   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
1244   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
1245 
1246   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
1247                           RI);
1248   MachineMemOperand *MMO =
1249       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
1250                               MachineMemOperand::MOLoad, 8, Align(8));
1251   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
1252       .addFrameIndex(FI)
1253       .addImm(0)
1254       .addMemOperand(MMO);
1255   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
1256       .addFrameIndex(FI)
1257       .addImm(4)
1258       .addMemOperand(MMO);
1259   MI.eraseFromParent(); // The pseudo instruction is gone now.
1260   return BB;
1261 }
1262 
1263 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
1264                                                  MachineBasicBlock *BB) {
1265   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
1266          "Unexpected instruction");
1267 
1268   MachineFunction &MF = *BB->getParent();
1269   DebugLoc DL = MI.getDebugLoc();
1270   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1271   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1272   Register DstReg = MI.getOperand(0).getReg();
1273   Register LoReg = MI.getOperand(1).getReg();
1274   Register HiReg = MI.getOperand(2).getReg();
1275   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
1276   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
1277 
1278   MachineMemOperand *MMO =
1279       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
1280                               MachineMemOperand::MOStore, 8, Align(8));
1281   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
1282       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
1283       .addFrameIndex(FI)
1284       .addImm(0)
1285       .addMemOperand(MMO);
1286   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
1287       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
1288       .addFrameIndex(FI)
1289       .addImm(4)
1290       .addMemOperand(MMO);
1291   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
1292   MI.eraseFromParent(); // The pseudo instruction is gone now.
1293   return BB;
1294 }
1295 
1296 static bool isSelectPseudo(MachineInstr &MI) {
1297   switch (MI.getOpcode()) {
1298   default:
1299     return false;
1300   case RISCV::Select_GPR_Using_CC_GPR:
1301   case RISCV::Select_FPR32_Using_CC_GPR:
1302   case RISCV::Select_FPR64_Using_CC_GPR:
1303     return true;
1304   }
1305 }
1306 
1307 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
1308                                            MachineBasicBlock *BB) {
1309   // To "insert" Select_* instructions, we actually have to insert the triangle
1310   // control-flow pattern.  The incoming instructions know the destination vreg
1311   // to set, the condition code register to branch on, the true/false values to
1312   // select between, and the condcode to use to select the appropriate branch.
1313   //
1314   // We produce the following control flow:
1315   //     HeadMBB
1316   //     |  \
1317   //     |  IfFalseMBB
1318   //     | /
1319   //    TailMBB
1320   //
1321   // When we find a sequence of selects we attempt to optimize their emission
1322   // by sharing the control flow. Currently we only handle cases where we have
1323   // multiple selects with the exact same condition (same LHS, RHS and CC).
1324   // The selects may be interleaved with other instructions if the other
1325   // instructions meet some requirements we deem safe:
1326   // - They are debug instructions. Otherwise,
1327   // - They do not have side-effects, do not access memory and their inputs do
1328   //   not depend on the results of the select pseudo-instructions.
1329   // The TrueV/FalseV operands of the selects cannot depend on the result of
1330   // previous selects in the sequence.
1331   // These conditions could be further relaxed. See the X86 target for a
1332   // related approach and more information.
1333   Register LHS = MI.getOperand(1).getReg();
1334   Register RHS = MI.getOperand(2).getReg();
1335   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
1336 
1337   SmallVector<MachineInstr *, 4> SelectDebugValues;
1338   SmallSet<Register, 4> SelectDests;
1339   SelectDests.insert(MI.getOperand(0).getReg());
1340 
1341   MachineInstr *LastSelectPseudo = &MI;
1342 
1343   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
1344        SequenceMBBI != E; ++SequenceMBBI) {
1345     if (SequenceMBBI->isDebugInstr())
1346       continue;
1347     else if (isSelectPseudo(*SequenceMBBI)) {
1348       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
1349           SequenceMBBI->getOperand(2).getReg() != RHS ||
1350           SequenceMBBI->getOperand(3).getImm() != CC ||
1351           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
1352           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
1353         break;
1354       LastSelectPseudo = &*SequenceMBBI;
1355       SequenceMBBI->collectDebugValues(SelectDebugValues);
1356       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
1357     } else {
1358       if (SequenceMBBI->hasUnmodeledSideEffects() ||
1359           SequenceMBBI->mayLoadOrStore())
1360         break;
1361       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
1362             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
1363           }))
1364         break;
1365     }
1366   }
1367 
1368   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1369   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1370   DebugLoc DL = MI.getDebugLoc();
1371   MachineFunction::iterator I = ++BB->getIterator();
1372 
1373   MachineBasicBlock *HeadMBB = BB;
1374   MachineFunction *F = BB->getParent();
1375   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
1376   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
1377 
1378   F->insert(I, IfFalseMBB);
1379   F->insert(I, TailMBB);
1380 
1381   // Transfer debug instructions associated with the selects to TailMBB.
1382   for (MachineInstr *DebugInstr : SelectDebugValues) {
1383     TailMBB->push_back(DebugInstr->removeFromParent());
1384   }
1385 
1386   // Move all instructions after the sequence to TailMBB.
1387   TailMBB->splice(TailMBB->end(), HeadMBB,
1388                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
1389   // Update machine-CFG edges by transferring all successors of the current
1390   // block to the new block which will contain the Phi nodes for the selects.
1391   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
1392   // Set the successors for HeadMBB.
1393   HeadMBB->addSuccessor(IfFalseMBB);
1394   HeadMBB->addSuccessor(TailMBB);
1395 
1396   // Insert appropriate branch.
1397   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
1398 
1399   BuildMI(HeadMBB, DL, TII.get(Opcode))
1400     .addReg(LHS)
1401     .addReg(RHS)
1402     .addMBB(TailMBB);
1403 
1404   // IfFalseMBB just falls through to TailMBB.
1405   IfFalseMBB->addSuccessor(TailMBB);
1406 
1407   // Create PHIs for all of the select pseudo-instructions.
1408   auto SelectMBBI = MI.getIterator();
1409   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
1410   auto InsertionPoint = TailMBB->begin();
1411   while (SelectMBBI != SelectEnd) {
1412     auto Next = std::next(SelectMBBI);
1413     if (isSelectPseudo(*SelectMBBI)) {
1414       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
1415       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
1416               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
1417           .addReg(SelectMBBI->getOperand(4).getReg())
1418           .addMBB(HeadMBB)
1419           .addReg(SelectMBBI->getOperand(5).getReg())
1420           .addMBB(IfFalseMBB);
1421       SelectMBBI->eraseFromParent();
1422     }
1423     SelectMBBI = Next;
1424   }
1425 
1426   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
1427   return TailMBB;
1428 }
1429 
1430 MachineBasicBlock *
1431 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1432                                                  MachineBasicBlock *BB) const {
1433   switch (MI.getOpcode()) {
1434   default:
1435     llvm_unreachable("Unexpected instr type to insert");
1436   case RISCV::ReadCycleWide:
1437     assert(!Subtarget.is64Bit() &&
1438            "ReadCycleWrite is only to be used on riscv32");
1439     return emitReadCycleWidePseudo(MI, BB);
1440   case RISCV::Select_GPR_Using_CC_GPR:
1441   case RISCV::Select_FPR32_Using_CC_GPR:
1442   case RISCV::Select_FPR64_Using_CC_GPR:
1443     return emitSelectPseudo(MI, BB);
1444   case RISCV::BuildPairF64Pseudo:
1445     return emitBuildPairF64Pseudo(MI, BB);
1446   case RISCV::SplitF64Pseudo:
1447     return emitSplitF64Pseudo(MI, BB);
1448   }
1449 }
1450 
1451 // Calling Convention Implementation.
1452 // The expectations for frontend ABI lowering vary from target to target.
1453 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
1454 // details, but this is a longer term goal. For now, we simply try to keep the
1455 // role of the frontend as simple and well-defined as possible. The rules can
1456 // be summarised as:
1457 // * Never split up large scalar arguments. We handle them here.
1458 // * If a hardfloat calling convention is being used, and the struct may be
1459 // passed in a pair of registers (fp+fp, int+fp), and both registers are
1460 // available, then pass as two separate arguments. If either the GPRs or FPRs
1461 // are exhausted, then pass according to the rule below.
1462 // * If a struct could never be passed in registers or directly in a stack
1463 // slot (as it is larger than 2*XLEN and the floating point rules don't
1464 // apply), then pass it using a pointer with the byval attribute.
1465 // * If a struct is less than 2*XLEN, then coerce to either a two-element
1466 // word-sized array or a 2*XLEN scalar (depending on alignment).
1467 // * The frontend can determine whether a struct is returned by reference or
1468 // not based on its size and fields. If it will be returned by reference, the
1469 // frontend must modify the prototype so a pointer with the sret annotation is
1470 // passed as the first argument. This is not necessary for large scalar
1471 // returns.
1472 // * Struct return values and varargs should be coerced to structs containing
1473 // register-size fields in the same situations they would be for fixed
1474 // arguments.
1475 
1476 static const MCPhysReg ArgGPRs[] = {
1477   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
1478   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
1479 };
1480 static const MCPhysReg ArgFPR32s[] = {
1481   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
1482   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
1483 };
1484 static const MCPhysReg ArgFPR64s[] = {
1485   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
1486   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
1487 };
1488 
1489 // Pass a 2*XLEN argument that has been split into two XLEN values through
1490 // registers or the stack as necessary.
1491 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
1492                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
1493                                 MVT ValVT2, MVT LocVT2,
1494                                 ISD::ArgFlagsTy ArgFlags2) {
1495   unsigned XLenInBytes = XLen / 8;
1496   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1497     // At least one half can be passed via register.
1498     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
1499                                      VA1.getLocVT(), CCValAssign::Full));
1500   } else {
1501     // Both halves must be passed on the stack, with proper alignment.
1502     Align StackAlign =
1503         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
1504     State.addLoc(
1505         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
1506                             State.AllocateStack(XLenInBytes, StackAlign),
1507                             VA1.getLocVT(), CCValAssign::Full));
1508     State.addLoc(CCValAssign::getMem(
1509         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
1510         LocVT2, CCValAssign::Full));
1511     return false;
1512   }
1513 
1514   if (Register Reg = State.AllocateReg(ArgGPRs)) {
1515     // The second half can also be passed via register.
1516     State.addLoc(
1517         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
1518   } else {
1519     // The second half is passed via the stack, without additional alignment.
1520     State.addLoc(CCValAssign::getMem(
1521         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
1522         LocVT2, CCValAssign::Full));
1523   }
1524 
1525   return false;
1526 }
1527 
1528 // Implements the RISC-V calling convention. Returns true upon failure.
1529 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1530                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1531                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1532                      bool IsRet, Type *OrigTy) {
1533   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
1534   assert(XLen == 32 || XLen == 64);
1535   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
1536 
1537   // Any return value split in to more than two values can't be returned
1538   // directly.
1539   if (IsRet && ValNo > 1)
1540     return true;
1541 
1542   // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
1543   // variadic argument, or if no F32 argument registers are available.
1544   bool UseGPRForF32 = true;
1545   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
1546   // variadic argument, or if no F64 argument registers are available.
1547   bool UseGPRForF64 = true;
1548 
1549   switch (ABI) {
1550   default:
1551     llvm_unreachable("Unexpected ABI");
1552   case RISCVABI::ABI_ILP32:
1553   case RISCVABI::ABI_LP64:
1554     break;
1555   case RISCVABI::ABI_ILP32F:
1556   case RISCVABI::ABI_LP64F:
1557     UseGPRForF32 = !IsFixed;
1558     break;
1559   case RISCVABI::ABI_ILP32D:
1560   case RISCVABI::ABI_LP64D:
1561     UseGPRForF32 = !IsFixed;
1562     UseGPRForF64 = !IsFixed;
1563     break;
1564   }
1565 
1566   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s))
1567     UseGPRForF32 = true;
1568   if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s))
1569     UseGPRForF64 = true;
1570 
1571   // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
1572   // variables rather than directly checking against the target ABI.
1573 
1574   if (UseGPRForF32 && ValVT == MVT::f32) {
1575     LocVT = XLenVT;
1576     LocInfo = CCValAssign::BCvt;
1577   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
1578     LocVT = MVT::i64;
1579     LocInfo = CCValAssign::BCvt;
1580   }
1581 
1582   // If this is a variadic argument, the RISC-V calling convention requires
1583   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
1584   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
1585   // be used regardless of whether the original argument was split during
1586   // legalisation or not. The argument will not be passed by registers if the
1587   // original type is larger than 2*XLEN, so the register alignment rule does
1588   // not apply.
1589   unsigned TwoXLenInBytes = (2 * XLen) / 8;
1590   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
1591       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
1592     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
1593     // Skip 'odd' register if necessary.
1594     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
1595       State.AllocateReg(ArgGPRs);
1596   }
1597 
1598   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
1599   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
1600       State.getPendingArgFlags();
1601 
1602   assert(PendingLocs.size() == PendingArgFlags.size() &&
1603          "PendingLocs and PendingArgFlags out of sync");
1604 
1605   // Handle passing f64 on RV32D with a soft float ABI or when floating point
1606   // registers are exhausted.
1607   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
1608     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
1609            "Can't lower f64 if it is split");
1610     // Depending on available argument GPRS, f64 may be passed in a pair of
1611     // GPRs, split between a GPR and the stack, or passed completely on the
1612     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
1613     // cases.
1614     Register Reg = State.AllocateReg(ArgGPRs);
1615     LocVT = MVT::i32;
1616     if (!Reg) {
1617       unsigned StackOffset = State.AllocateStack(8, Align(8));
1618       State.addLoc(
1619           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1620       return false;
1621     }
1622     if (!State.AllocateReg(ArgGPRs))
1623       State.AllocateStack(4, Align(4));
1624     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1625     return false;
1626   }
1627 
1628   // Split arguments might be passed indirectly, so keep track of the pending
1629   // values.
1630   if (ArgFlags.isSplit() || !PendingLocs.empty()) {
1631     LocVT = XLenVT;
1632     LocInfo = CCValAssign::Indirect;
1633     PendingLocs.push_back(
1634         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
1635     PendingArgFlags.push_back(ArgFlags);
1636     if (!ArgFlags.isSplitEnd()) {
1637       return false;
1638     }
1639   }
1640 
1641   // If the split argument only had two elements, it should be passed directly
1642   // in registers or on the stack.
1643   if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
1644     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
1645     // Apply the normal calling convention rules to the first half of the
1646     // split argument.
1647     CCValAssign VA = PendingLocs[0];
1648     ISD::ArgFlagsTy AF = PendingArgFlags[0];
1649     PendingLocs.clear();
1650     PendingArgFlags.clear();
1651     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
1652                                ArgFlags);
1653   }
1654 
1655   // Allocate to a register if possible, or else a stack slot.
1656   Register Reg;
1657   if (ValVT == MVT::f32 && !UseGPRForF32)
1658     Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s);
1659   else if (ValVT == MVT::f64 && !UseGPRForF64)
1660     Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s);
1661   else
1662     Reg = State.AllocateReg(ArgGPRs);
1663   unsigned StackOffset =
1664       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
1665 
1666   // If we reach this point and PendingLocs is non-empty, we must be at the
1667   // end of a split argument that must be passed indirectly.
1668   if (!PendingLocs.empty()) {
1669     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
1670     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
1671 
1672     for (auto &It : PendingLocs) {
1673       if (Reg)
1674         It.convertToReg(Reg);
1675       else
1676         It.convertToMem(StackOffset);
1677       State.addLoc(It);
1678     }
1679     PendingLocs.clear();
1680     PendingArgFlags.clear();
1681     return false;
1682   }
1683 
1684   assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) &&
1685          "Expected an XLenVT at this stage");
1686 
1687   if (Reg) {
1688     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1689     return false;
1690   }
1691 
1692   // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
1693   if (ValVT == MVT::f32 || ValVT == MVT::f64) {
1694     LocVT = ValVT;
1695     LocInfo = CCValAssign::Full;
1696   }
1697   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
1698   return false;
1699 }
1700 
1701 void RISCVTargetLowering::analyzeInputArgs(
1702     MachineFunction &MF, CCState &CCInfo,
1703     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
1704   unsigned NumArgs = Ins.size();
1705   FunctionType *FType = MF.getFunction().getFunctionType();
1706 
1707   for (unsigned i = 0; i != NumArgs; ++i) {
1708     MVT ArgVT = Ins[i].VT;
1709     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
1710 
1711     Type *ArgTy = nullptr;
1712     if (IsRet)
1713       ArgTy = FType->getReturnType();
1714     else if (Ins[i].isOrigArg())
1715       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
1716 
1717     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
1718     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
1719                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
1720       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
1721                         << EVT(ArgVT).getEVTString() << '\n');
1722       llvm_unreachable(nullptr);
1723     }
1724   }
1725 }
1726 
1727 void RISCVTargetLowering::analyzeOutputArgs(
1728     MachineFunction &MF, CCState &CCInfo,
1729     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
1730     CallLoweringInfo *CLI) const {
1731   unsigned NumArgs = Outs.size();
1732 
1733   for (unsigned i = 0; i != NumArgs; i++) {
1734     MVT ArgVT = Outs[i].VT;
1735     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
1736     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
1737 
1738     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
1739     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
1740                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
1741       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
1742                         << EVT(ArgVT).getEVTString() << "\n");
1743       llvm_unreachable(nullptr);
1744     }
1745   }
1746 }
1747 
1748 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
1749 // values.
1750 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
1751                                    const CCValAssign &VA, const SDLoc &DL) {
1752   switch (VA.getLocInfo()) {
1753   default:
1754     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1755   case CCValAssign::Full:
1756     break;
1757   case CCValAssign::BCvt:
1758     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1759       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
1760       break;
1761     }
1762     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
1763     break;
1764   }
1765   return Val;
1766 }
1767 
1768 // The caller is responsible for loading the full value if the argument is
1769 // passed with CCValAssign::Indirect.
1770 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
1771                                 const CCValAssign &VA, const SDLoc &DL) {
1772   MachineFunction &MF = DAG.getMachineFunction();
1773   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1774   EVT LocVT = VA.getLocVT();
1775   SDValue Val;
1776   const TargetRegisterClass *RC;
1777 
1778   switch (LocVT.getSimpleVT().SimpleTy) {
1779   default:
1780     llvm_unreachable("Unexpected register type");
1781   case MVT::i32:
1782   case MVT::i64:
1783     RC = &RISCV::GPRRegClass;
1784     break;
1785   case MVT::f32:
1786     RC = &RISCV::FPR32RegClass;
1787     break;
1788   case MVT::f64:
1789     RC = &RISCV::FPR64RegClass;
1790     break;
1791   }
1792 
1793   Register VReg = RegInfo.createVirtualRegister(RC);
1794   RegInfo.addLiveIn(VA.getLocReg(), VReg);
1795   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1796 
1797   if (VA.getLocInfo() == CCValAssign::Indirect)
1798     return Val;
1799 
1800   return convertLocVTToValVT(DAG, Val, VA, DL);
1801 }
1802 
1803 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
1804                                    const CCValAssign &VA, const SDLoc &DL) {
1805   EVT LocVT = VA.getLocVT();
1806 
1807   switch (VA.getLocInfo()) {
1808   default:
1809     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1810   case CCValAssign::Full:
1811     break;
1812   case CCValAssign::BCvt:
1813     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
1814       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
1815       break;
1816     }
1817     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
1818     break;
1819   }
1820   return Val;
1821 }
1822 
1823 // The caller is responsible for loading the full value if the argument is
1824 // passed with CCValAssign::Indirect.
1825 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
1826                                 const CCValAssign &VA, const SDLoc &DL) {
1827   MachineFunction &MF = DAG.getMachineFunction();
1828   MachineFrameInfo &MFI = MF.getFrameInfo();
1829   EVT LocVT = VA.getLocVT();
1830   EVT ValVT = VA.getValVT();
1831   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
1832   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1833                                  VA.getLocMemOffset(), /*Immutable=*/true);
1834   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1835   SDValue Val;
1836 
1837   ISD::LoadExtType ExtType;
1838   switch (VA.getLocInfo()) {
1839   default:
1840     llvm_unreachable("Unexpected CCValAssign::LocInfo");
1841   case CCValAssign::Full:
1842   case CCValAssign::Indirect:
1843   case CCValAssign::BCvt:
1844     ExtType = ISD::NON_EXTLOAD;
1845     break;
1846   }
1847   Val = DAG.getExtLoad(
1848       ExtType, DL, LocVT, Chain, FIN,
1849       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
1850   return Val;
1851 }
1852 
1853 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
1854                                        const CCValAssign &VA, const SDLoc &DL) {
1855   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
1856          "Unexpected VA");
1857   MachineFunction &MF = DAG.getMachineFunction();
1858   MachineFrameInfo &MFI = MF.getFrameInfo();
1859   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1860 
1861   if (VA.isMemLoc()) {
1862     // f64 is passed on the stack.
1863     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
1864     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1865     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
1866                        MachinePointerInfo::getFixedStack(MF, FI));
1867   }
1868 
1869   assert(VA.isRegLoc() && "Expected register VA assignment");
1870 
1871   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1872   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
1873   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
1874   SDValue Hi;
1875   if (VA.getLocReg() == RISCV::X17) {
1876     // Second half of f64 is passed on the stack.
1877     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
1878     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
1879     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
1880                      MachinePointerInfo::getFixedStack(MF, FI));
1881   } else {
1882     // Second half of f64 is passed in another GPR.
1883     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
1884     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
1885     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
1886   }
1887   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1888 }
1889 
1890 // FastCC has less than 1% performance improvement for some particular
1891 // benchmark. But theoretically, it may has benenfit for some cases.
1892 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
1893                             CCValAssign::LocInfo LocInfo,
1894                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
1895 
1896   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
1897     // X5 and X6 might be used for save-restore libcall.
1898     static const MCPhysReg GPRList[] = {
1899         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
1900         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
1901         RISCV::X29, RISCV::X30, RISCV::X31};
1902     if (unsigned Reg = State.AllocateReg(GPRList)) {
1903       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1904       return false;
1905     }
1906   }
1907 
1908   if (LocVT == MVT::f32) {
1909     static const MCPhysReg FPR32List[] = {
1910         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
1911         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
1912         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
1913         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
1914     if (unsigned Reg = State.AllocateReg(FPR32List)) {
1915       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1916       return false;
1917     }
1918   }
1919 
1920   if (LocVT == MVT::f64) {
1921     static const MCPhysReg FPR64List[] = {
1922         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
1923         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
1924         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
1925         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
1926     if (unsigned Reg = State.AllocateReg(FPR64List)) {
1927       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
1928       return false;
1929     }
1930   }
1931 
1932   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
1933     unsigned Offset4 = State.AllocateStack(4, Align(4));
1934     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
1935     return false;
1936   }
1937 
1938   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
1939     unsigned Offset5 = State.AllocateStack(8, Align(8));
1940     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
1941     return false;
1942   }
1943 
1944   return true; // CC didn't match.
1945 }
1946 
1947 // Transform physical registers into virtual registers.
1948 SDValue RISCVTargetLowering::LowerFormalArguments(
1949     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1950     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1951     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1952 
1953   switch (CallConv) {
1954   default:
1955     report_fatal_error("Unsupported calling convention");
1956   case CallingConv::C:
1957   case CallingConv::Fast:
1958     break;
1959   }
1960 
1961   MachineFunction &MF = DAG.getMachineFunction();
1962 
1963   const Function &Func = MF.getFunction();
1964   if (Func.hasFnAttribute("interrupt")) {
1965     if (!Func.arg_empty())
1966       report_fatal_error(
1967         "Functions with the interrupt attribute cannot have arguments!");
1968 
1969     StringRef Kind =
1970       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
1971 
1972     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
1973       report_fatal_error(
1974         "Function interrupt attribute argument not supported!");
1975   }
1976 
1977   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1978   MVT XLenVT = Subtarget.getXLenVT();
1979   unsigned XLenInBytes = Subtarget.getXLen() / 8;
1980   // Used with vargs to acumulate store chains.
1981   std::vector<SDValue> OutChains;
1982 
1983   // Assign locations to all of the incoming arguments.
1984   SmallVector<CCValAssign, 16> ArgLocs;
1985   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1986 
1987   if (CallConv == CallingConv::Fast)
1988     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
1989   else
1990     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
1991 
1992   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1993     CCValAssign &VA = ArgLocs[i];
1994     SDValue ArgValue;
1995     // Passing f64 on RV32D with a soft float ABI must be handled as a special
1996     // case.
1997     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
1998       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
1999     else if (VA.isRegLoc())
2000       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
2001     else
2002       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
2003 
2004     if (VA.getLocInfo() == CCValAssign::Indirect) {
2005       // If the original argument was split and passed by reference (e.g. i128
2006       // on RV32), we need to load all parts of it here (using the same
2007       // address).
2008       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2009                                    MachinePointerInfo()));
2010       unsigned ArgIndex = Ins[i].OrigArgIndex;
2011       assert(Ins[i].PartOffset == 0);
2012       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
2013         CCValAssign &PartVA = ArgLocs[i + 1];
2014         unsigned PartOffset = Ins[i + 1].PartOffset;
2015         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2016                                       DAG.getIntPtrConstant(PartOffset, DL));
2017         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2018                                      MachinePointerInfo()));
2019         ++i;
2020       }
2021       continue;
2022     }
2023     InVals.push_back(ArgValue);
2024   }
2025 
2026   if (IsVarArg) {
2027     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
2028     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
2029     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
2030     MachineFrameInfo &MFI = MF.getFrameInfo();
2031     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2032     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2033 
2034     // Offset of the first variable argument from stack pointer, and size of
2035     // the vararg save area. For now, the varargs save area is either zero or
2036     // large enough to hold a0-a7.
2037     int VaArgOffset, VarArgsSaveSize;
2038 
2039     // If all registers are allocated, then all varargs must be passed on the
2040     // stack and we don't need to save any argregs.
2041     if (ArgRegs.size() == Idx) {
2042       VaArgOffset = CCInfo.getNextStackOffset();
2043       VarArgsSaveSize = 0;
2044     } else {
2045       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
2046       VaArgOffset = -VarArgsSaveSize;
2047     }
2048 
2049     // Record the frame index of the first variable argument
2050     // which is a value necessary to VASTART.
2051     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
2052     RVFI->setVarArgsFrameIndex(FI);
2053 
2054     // If saving an odd number of registers then create an extra stack slot to
2055     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
2056     // offsets to even-numbered registered remain 2*XLEN-aligned.
2057     if (Idx % 2) {
2058       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
2059       VarArgsSaveSize += XLenInBytes;
2060     }
2061 
2062     // Copy the integer registers that may have been used for passing varargs
2063     // to the vararg save area.
2064     for (unsigned I = Idx; I < ArgRegs.size();
2065          ++I, VaArgOffset += XLenInBytes) {
2066       const Register Reg = RegInfo.createVirtualRegister(RC);
2067       RegInfo.addLiveIn(ArgRegs[I], Reg);
2068       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
2069       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
2070       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2071       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
2072                                    MachinePointerInfo::getFixedStack(MF, FI));
2073       cast<StoreSDNode>(Store.getNode())
2074           ->getMemOperand()
2075           ->setValue((Value *)nullptr);
2076       OutChains.push_back(Store);
2077     }
2078     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
2079   }
2080 
2081   // All stores are grouped in one node to allow the matching between
2082   // the size of Ins and InVals. This only happens for vararg functions.
2083   if (!OutChains.empty()) {
2084     OutChains.push_back(Chain);
2085     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
2086   }
2087 
2088   return Chain;
2089 }
2090 
2091 /// isEligibleForTailCallOptimization - Check whether the call is eligible
2092 /// for tail call optimization.
2093 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
2094 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
2095     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
2096     const SmallVector<CCValAssign, 16> &ArgLocs) const {
2097 
2098   auto &Callee = CLI.Callee;
2099   auto CalleeCC = CLI.CallConv;
2100   auto &Outs = CLI.Outs;
2101   auto &Caller = MF.getFunction();
2102   auto CallerCC = Caller.getCallingConv();
2103 
2104   // Exception-handling functions need a special set of instructions to
2105   // indicate a return to the hardware. Tail-calling another function would
2106   // probably break this.
2107   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
2108   // should be expanded as new function attributes are introduced.
2109   if (Caller.hasFnAttribute("interrupt"))
2110     return false;
2111 
2112   // Do not tail call opt if the stack is used to pass parameters.
2113   if (CCInfo.getNextStackOffset() != 0)
2114     return false;
2115 
2116   // Do not tail call opt if any parameters need to be passed indirectly.
2117   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
2118   // passed indirectly. So the address of the value will be passed in a
2119   // register, or if not available, then the address is put on the stack. In
2120   // order to pass indirectly, space on the stack often needs to be allocated
2121   // in order to store the value. In this case the CCInfo.getNextStackOffset()
2122   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
2123   // are passed CCValAssign::Indirect.
2124   for (auto &VA : ArgLocs)
2125     if (VA.getLocInfo() == CCValAssign::Indirect)
2126       return false;
2127 
2128   // Do not tail call opt if either caller or callee uses struct return
2129   // semantics.
2130   auto IsCallerStructRet = Caller.hasStructRetAttr();
2131   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
2132   if (IsCallerStructRet || IsCalleeStructRet)
2133     return false;
2134 
2135   // Externally-defined functions with weak linkage should not be
2136   // tail-called. The behaviour of branch instructions in this situation (as
2137   // used for tail calls) is implementation-defined, so we cannot rely on the
2138   // linker replacing the tail call with a return.
2139   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2140     const GlobalValue *GV = G->getGlobal();
2141     if (GV->hasExternalWeakLinkage())
2142       return false;
2143   }
2144 
2145   // The callee has to preserve all registers the caller needs to preserve.
2146   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
2147   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2148   if (CalleeCC != CallerCC) {
2149     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2150     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2151       return false;
2152   }
2153 
2154   // Byval parameters hand the function a pointer directly into the stack area
2155   // we want to reuse during a tail call. Working around this *is* possible
2156   // but less efficient and uglier in LowerCall.
2157   for (auto &Arg : Outs)
2158     if (Arg.Flags.isByVal())
2159       return false;
2160 
2161   return true;
2162 }
2163 
2164 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
2165 // and output parameter nodes.
2166 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
2167                                        SmallVectorImpl<SDValue> &InVals) const {
2168   SelectionDAG &DAG = CLI.DAG;
2169   SDLoc &DL = CLI.DL;
2170   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2171   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2172   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2173   SDValue Chain = CLI.Chain;
2174   SDValue Callee = CLI.Callee;
2175   bool &IsTailCall = CLI.IsTailCall;
2176   CallingConv::ID CallConv = CLI.CallConv;
2177   bool IsVarArg = CLI.IsVarArg;
2178   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2179   MVT XLenVT = Subtarget.getXLenVT();
2180 
2181   MachineFunction &MF = DAG.getMachineFunction();
2182 
2183   // Analyze the operands of the call, assigning locations to each operand.
2184   SmallVector<CCValAssign, 16> ArgLocs;
2185   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2186 
2187   if (CallConv == CallingConv::Fast)
2188     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
2189   else
2190     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
2191 
2192   // Check if it's really possible to do a tail call.
2193   if (IsTailCall)
2194     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
2195 
2196   if (IsTailCall)
2197     ++NumTailCalls;
2198   else if (CLI.CB && CLI.CB->isMustTailCall())
2199     report_fatal_error("failed to perform tail call elimination on a call "
2200                        "site marked musttail");
2201 
2202   // Get a count of how many bytes are to be pushed on the stack.
2203   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
2204 
2205   // Create local copies for byval args
2206   SmallVector<SDValue, 8> ByValArgs;
2207   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2208     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2209     if (!Flags.isByVal())
2210       continue;
2211 
2212     SDValue Arg = OutVals[i];
2213     unsigned Size = Flags.getByValSize();
2214     Align Alignment = Flags.getNonZeroByValAlign();
2215 
2216     int FI =
2217         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
2218     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2219     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
2220 
2221     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
2222                           /*IsVolatile=*/false,
2223                           /*AlwaysInline=*/false, IsTailCall,
2224                           MachinePointerInfo(), MachinePointerInfo());
2225     ByValArgs.push_back(FIPtr);
2226   }
2227 
2228   if (!IsTailCall)
2229     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
2230 
2231   // Copy argument values to their designated locations.
2232   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2233   SmallVector<SDValue, 8> MemOpChains;
2234   SDValue StackPtr;
2235   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
2236     CCValAssign &VA = ArgLocs[i];
2237     SDValue ArgValue = OutVals[i];
2238     ISD::ArgFlagsTy Flags = Outs[i].Flags;
2239 
2240     // Handle passing f64 on RV32D with a soft float ABI as a special case.
2241     bool IsF64OnRV32DSoftABI =
2242         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
2243     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
2244       SDValue SplitF64 = DAG.getNode(
2245           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
2246       SDValue Lo = SplitF64.getValue(0);
2247       SDValue Hi = SplitF64.getValue(1);
2248 
2249       Register RegLo = VA.getLocReg();
2250       RegsToPass.push_back(std::make_pair(RegLo, Lo));
2251 
2252       if (RegLo == RISCV::X17) {
2253         // Second half of f64 is passed on the stack.
2254         // Work out the address of the stack slot.
2255         if (!StackPtr.getNode())
2256           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
2257         // Emit the store.
2258         MemOpChains.push_back(
2259             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
2260       } else {
2261         // Second half of f64 is passed in another GPR.
2262         assert(RegLo < RISCV::X31 && "Invalid register pair");
2263         Register RegHigh = RegLo + 1;
2264         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
2265       }
2266       continue;
2267     }
2268 
2269     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
2270     // as any other MemLoc.
2271 
2272     // Promote the value if needed.
2273     // For now, only handle fully promoted and indirect arguments.
2274     if (VA.getLocInfo() == CCValAssign::Indirect) {
2275       // Store the argument in a stack slot and pass its address.
2276       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
2277       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2278       MemOpChains.push_back(
2279           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2280                        MachinePointerInfo::getFixedStack(MF, FI)));
2281       // If the original argument was split (e.g. i128), we need
2282       // to store all parts of it here (and pass just one address).
2283       unsigned ArgIndex = Outs[i].OrigArgIndex;
2284       assert(Outs[i].PartOffset == 0);
2285       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
2286         SDValue PartValue = OutVals[i + 1];
2287         unsigned PartOffset = Outs[i + 1].PartOffset;
2288         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2289                                       DAG.getIntPtrConstant(PartOffset, DL));
2290         MemOpChains.push_back(
2291             DAG.getStore(Chain, DL, PartValue, Address,
2292                          MachinePointerInfo::getFixedStack(MF, FI)));
2293         ++i;
2294       }
2295       ArgValue = SpillSlot;
2296     } else {
2297       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
2298     }
2299 
2300     // Use local copy if it is a byval arg.
2301     if (Flags.isByVal())
2302       ArgValue = ByValArgs[j++];
2303 
2304     if (VA.isRegLoc()) {
2305       // Queue up the argument copies and emit them at the end.
2306       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2307     } else {
2308       assert(VA.isMemLoc() && "Argument not register or memory");
2309       assert(!IsTailCall && "Tail call not allowed if stack is used "
2310                             "for passing parameters");
2311 
2312       // Work out the address of the stack slot.
2313       if (!StackPtr.getNode())
2314         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
2315       SDValue Address =
2316           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2317                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
2318 
2319       // Emit the store.
2320       MemOpChains.push_back(
2321           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2322     }
2323   }
2324 
2325   // Join the stores, which are independent of one another.
2326   if (!MemOpChains.empty())
2327     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2328 
2329   SDValue Glue;
2330 
2331   // Build a sequence of copy-to-reg nodes, chained and glued together.
2332   for (auto &Reg : RegsToPass) {
2333     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
2334     Glue = Chain.getValue(1);
2335   }
2336 
2337   // Validate that none of the argument registers have been marked as
2338   // reserved, if so report an error. Do the same for the return address if this
2339   // is not a tailcall.
2340   validateCCReservedRegs(RegsToPass, MF);
2341   if (!IsTailCall &&
2342       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
2343     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2344         MF.getFunction(),
2345         "Return address register required, but has been reserved."});
2346 
2347   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
2348   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
2349   // split it and then direct call can be matched by PseudoCALL.
2350   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
2351     const GlobalValue *GV = S->getGlobal();
2352 
2353     unsigned OpFlags = RISCVII::MO_CALL;
2354     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
2355       OpFlags = RISCVII::MO_PLT;
2356 
2357     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
2358   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2359     unsigned OpFlags = RISCVII::MO_CALL;
2360 
2361     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
2362                                                  nullptr))
2363       OpFlags = RISCVII::MO_PLT;
2364 
2365     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
2366   }
2367 
2368   // The first call operand is the chain and the second is the target address.
2369   SmallVector<SDValue, 8> Ops;
2370   Ops.push_back(Chain);
2371   Ops.push_back(Callee);
2372 
2373   // Add argument registers to the end of the list so that they are
2374   // known live into the call.
2375   for (auto &Reg : RegsToPass)
2376     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2377 
2378   if (!IsTailCall) {
2379     // Add a register mask operand representing the call-preserved registers.
2380     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2381     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2382     assert(Mask && "Missing call preserved mask for calling convention");
2383     Ops.push_back(DAG.getRegisterMask(Mask));
2384   }
2385 
2386   // Glue the call to the argument copies, if any.
2387   if (Glue.getNode())
2388     Ops.push_back(Glue);
2389 
2390   // Emit the call.
2391   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2392 
2393   if (IsTailCall) {
2394     MF.getFrameInfo().setHasTailCall();
2395     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
2396   }
2397 
2398   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
2399   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2400   Glue = Chain.getValue(1);
2401 
2402   // Mark the end of the call, which is glued to the call itself.
2403   Chain = DAG.getCALLSEQ_END(Chain,
2404                              DAG.getConstant(NumBytes, DL, PtrVT, true),
2405                              DAG.getConstant(0, DL, PtrVT, true),
2406                              Glue, DL);
2407   Glue = Chain.getValue(1);
2408 
2409   // Assign locations to each value returned by this call.
2410   SmallVector<CCValAssign, 16> RVLocs;
2411   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
2412   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
2413 
2414   // Copy all of the result registers out of their specified physreg.
2415   for (auto &VA : RVLocs) {
2416     // Copy the value out
2417     SDValue RetValue =
2418         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
2419     // Glue the RetValue to the end of the call sequence
2420     Chain = RetValue.getValue(1);
2421     Glue = RetValue.getValue(2);
2422 
2423     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
2424       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
2425       SDValue RetValue2 =
2426           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
2427       Chain = RetValue2.getValue(1);
2428       Glue = RetValue2.getValue(2);
2429       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
2430                              RetValue2);
2431     }
2432 
2433     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
2434 
2435     InVals.push_back(RetValue);
2436   }
2437 
2438   return Chain;
2439 }
2440 
2441 bool RISCVTargetLowering::CanLowerReturn(
2442     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2443     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2444   SmallVector<CCValAssign, 16> RVLocs;
2445   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
2446   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
2447     MVT VT = Outs[i].VT;
2448     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2449     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
2450     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
2451                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
2452       return false;
2453   }
2454   return true;
2455 }
2456 
2457 SDValue
2458 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2459                                  bool IsVarArg,
2460                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
2461                                  const SmallVectorImpl<SDValue> &OutVals,
2462                                  const SDLoc &DL, SelectionDAG &DAG) const {
2463   const MachineFunction &MF = DAG.getMachineFunction();
2464   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
2465 
2466   // Stores the assignment of the return value to a location.
2467   SmallVector<CCValAssign, 16> RVLocs;
2468 
2469   // Info about the registers and stack slot.
2470   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
2471                  *DAG.getContext());
2472 
2473   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
2474                     nullptr);
2475 
2476   SDValue Glue;
2477   SmallVector<SDValue, 4> RetOps(1, Chain);
2478 
2479   // Copy the result values into the output registers.
2480   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
2481     SDValue Val = OutVals[i];
2482     CCValAssign &VA = RVLocs[i];
2483     assert(VA.isRegLoc() && "Can only return in registers!");
2484 
2485     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
2486       // Handle returning f64 on RV32D with a soft float ABI.
2487       assert(VA.isRegLoc() && "Expected return via registers");
2488       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
2489                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
2490       SDValue Lo = SplitF64.getValue(0);
2491       SDValue Hi = SplitF64.getValue(1);
2492       Register RegLo = VA.getLocReg();
2493       assert(RegLo < RISCV::X31 && "Invalid register pair");
2494       Register RegHi = RegLo + 1;
2495 
2496       if (STI.isRegisterReservedByUser(RegLo) ||
2497           STI.isRegisterReservedByUser(RegHi))
2498         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2499             MF.getFunction(),
2500             "Return value register required, but has been reserved."});
2501 
2502       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
2503       Glue = Chain.getValue(1);
2504       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
2505       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
2506       Glue = Chain.getValue(1);
2507       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
2508     } else {
2509       // Handle a 'normal' return.
2510       Val = convertValVTToLocVT(DAG, Val, VA, DL);
2511       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
2512 
2513       if (STI.isRegisterReservedByUser(VA.getLocReg()))
2514         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2515             MF.getFunction(),
2516             "Return value register required, but has been reserved."});
2517 
2518       // Guarantee that all emitted copies are stuck together.
2519       Glue = Chain.getValue(1);
2520       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2521     }
2522   }
2523 
2524   RetOps[0] = Chain; // Update chain.
2525 
2526   // Add the glue node if we have it.
2527   if (Glue.getNode()) {
2528     RetOps.push_back(Glue);
2529   }
2530 
2531   // Interrupt service routines use different return instructions.
2532   const Function &Func = DAG.getMachineFunction().getFunction();
2533   if (Func.hasFnAttribute("interrupt")) {
2534     if (!Func.getReturnType()->isVoidTy())
2535       report_fatal_error(
2536           "Functions with the interrupt attribute must have void return type!");
2537 
2538     MachineFunction &MF = DAG.getMachineFunction();
2539     StringRef Kind =
2540       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
2541 
2542     unsigned RetOpc;
2543     if (Kind == "user")
2544       RetOpc = RISCVISD::URET_FLAG;
2545     else if (Kind == "supervisor")
2546       RetOpc = RISCVISD::SRET_FLAG;
2547     else
2548       RetOpc = RISCVISD::MRET_FLAG;
2549 
2550     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
2551   }
2552 
2553   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
2554 }
2555 
2556 void RISCVTargetLowering::validateCCReservedRegs(
2557     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
2558     MachineFunction &MF) const {
2559   const Function &F = MF.getFunction();
2560   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
2561 
2562   if (std::any_of(std::begin(Regs), std::end(Regs), [&STI](auto Reg) {
2563         return STI.isRegisterReservedByUser(Reg.first);
2564       }))
2565     F.getContext().diagnose(DiagnosticInfoUnsupported{
2566         F, "Argument register required, but has been reserved."});
2567 }
2568 
2569 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2570   return CI->isTailCall();
2571 }
2572 
2573 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
2574   switch ((RISCVISD::NodeType)Opcode) {
2575   case RISCVISD::FIRST_NUMBER:
2576     break;
2577   case RISCVISD::RET_FLAG:
2578     return "RISCVISD::RET_FLAG";
2579   case RISCVISD::URET_FLAG:
2580     return "RISCVISD::URET_FLAG";
2581   case RISCVISD::SRET_FLAG:
2582     return "RISCVISD::SRET_FLAG";
2583   case RISCVISD::MRET_FLAG:
2584     return "RISCVISD::MRET_FLAG";
2585   case RISCVISD::CALL:
2586     return "RISCVISD::CALL";
2587   case RISCVISD::SELECT_CC:
2588     return "RISCVISD::SELECT_CC";
2589   case RISCVISD::BuildPairF64:
2590     return "RISCVISD::BuildPairF64";
2591   case RISCVISD::SplitF64:
2592     return "RISCVISD::SplitF64";
2593   case RISCVISD::TAIL:
2594     return "RISCVISD::TAIL";
2595   case RISCVISD::SLLW:
2596     return "RISCVISD::SLLW";
2597   case RISCVISD::SRAW:
2598     return "RISCVISD::SRAW";
2599   case RISCVISD::SRLW:
2600     return "RISCVISD::SRLW";
2601   case RISCVISD::DIVW:
2602     return "RISCVISD::DIVW";
2603   case RISCVISD::DIVUW:
2604     return "RISCVISD::DIVUW";
2605   case RISCVISD::REMUW:
2606     return "RISCVISD::REMUW";
2607   case RISCVISD::FMV_W_X_RV64:
2608     return "RISCVISD::FMV_W_X_RV64";
2609   case RISCVISD::FMV_X_ANYEXTW_RV64:
2610     return "RISCVISD::FMV_X_ANYEXTW_RV64";
2611   case RISCVISD::READ_CYCLE_WIDE:
2612     return "RISCVISD::READ_CYCLE_WIDE";
2613   }
2614   return nullptr;
2615 }
2616 
2617 /// getConstraintType - Given a constraint letter, return the type of
2618 /// constraint it is for this target.
2619 RISCVTargetLowering::ConstraintType
2620 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
2621   if (Constraint.size() == 1) {
2622     switch (Constraint[0]) {
2623     default:
2624       break;
2625     case 'f':
2626       return C_RegisterClass;
2627     case 'I':
2628     case 'J':
2629     case 'K':
2630       return C_Immediate;
2631     case 'A':
2632       return C_Memory;
2633     }
2634   }
2635   return TargetLowering::getConstraintType(Constraint);
2636 }
2637 
2638 std::pair<unsigned, const TargetRegisterClass *>
2639 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2640                                                   StringRef Constraint,
2641                                                   MVT VT) const {
2642   // First, see if this is a constraint that directly corresponds to a
2643   // RISCV register class.
2644   if (Constraint.size() == 1) {
2645     switch (Constraint[0]) {
2646     case 'r':
2647       return std::make_pair(0U, &RISCV::GPRRegClass);
2648     case 'f':
2649       if (Subtarget.hasStdExtF() && VT == MVT::f32)
2650         return std::make_pair(0U, &RISCV::FPR32RegClass);
2651       if (Subtarget.hasStdExtD() && VT == MVT::f64)
2652         return std::make_pair(0U, &RISCV::FPR64RegClass);
2653       break;
2654     default:
2655       break;
2656     }
2657   }
2658 
2659   // Clang will correctly decode the usage of register name aliases into their
2660   // official names. However, other frontends like `rustc` do not. This allows
2661   // users of these frontends to use the ABI names for registers in LLVM-style
2662   // register constraints.
2663   Register XRegFromAlias = StringSwitch<Register>(Constraint.lower())
2664                                .Case("{zero}", RISCV::X0)
2665                                .Case("{ra}", RISCV::X1)
2666                                .Case("{sp}", RISCV::X2)
2667                                .Case("{gp}", RISCV::X3)
2668                                .Case("{tp}", RISCV::X4)
2669                                .Case("{t0}", RISCV::X5)
2670                                .Case("{t1}", RISCV::X6)
2671                                .Case("{t2}", RISCV::X7)
2672                                .Cases("{s0}", "{fp}", RISCV::X8)
2673                                .Case("{s1}", RISCV::X9)
2674                                .Case("{a0}", RISCV::X10)
2675                                .Case("{a1}", RISCV::X11)
2676                                .Case("{a2}", RISCV::X12)
2677                                .Case("{a3}", RISCV::X13)
2678                                .Case("{a4}", RISCV::X14)
2679                                .Case("{a5}", RISCV::X15)
2680                                .Case("{a6}", RISCV::X16)
2681                                .Case("{a7}", RISCV::X17)
2682                                .Case("{s2}", RISCV::X18)
2683                                .Case("{s3}", RISCV::X19)
2684                                .Case("{s4}", RISCV::X20)
2685                                .Case("{s5}", RISCV::X21)
2686                                .Case("{s6}", RISCV::X22)
2687                                .Case("{s7}", RISCV::X23)
2688                                .Case("{s8}", RISCV::X24)
2689                                .Case("{s9}", RISCV::X25)
2690                                .Case("{s10}", RISCV::X26)
2691                                .Case("{s11}", RISCV::X27)
2692                                .Case("{t3}", RISCV::X28)
2693                                .Case("{t4}", RISCV::X29)
2694                                .Case("{t5}", RISCV::X30)
2695                                .Case("{t6}", RISCV::X31)
2696                                .Default(RISCV::NoRegister);
2697   if (XRegFromAlias != RISCV::NoRegister)
2698     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
2699 
2700   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
2701   // TableGen record rather than the AsmName to choose registers for InlineAsm
2702   // constraints, plus we want to match those names to the widest floating point
2703   // register type available, manually select floating point registers here.
2704   //
2705   // The second case is the ABI name of the register, so that frontends can also
2706   // use the ABI names in register constraint lists.
2707   if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) {
2708     std::pair<Register, Register> FReg =
2709         StringSwitch<std::pair<Register, Register>>(Constraint.lower())
2710             .Cases("{f0}", "{ft0}", {RISCV::F0_F, RISCV::F0_D})
2711             .Cases("{f1}", "{ft1}", {RISCV::F1_F, RISCV::F1_D})
2712             .Cases("{f2}", "{ft2}", {RISCV::F2_F, RISCV::F2_D})
2713             .Cases("{f3}", "{ft3}", {RISCV::F3_F, RISCV::F3_D})
2714             .Cases("{f4}", "{ft4}", {RISCV::F4_F, RISCV::F4_D})
2715             .Cases("{f5}", "{ft5}", {RISCV::F5_F, RISCV::F5_D})
2716             .Cases("{f6}", "{ft6}", {RISCV::F6_F, RISCV::F6_D})
2717             .Cases("{f7}", "{ft7}", {RISCV::F7_F, RISCV::F7_D})
2718             .Cases("{f8}", "{fs0}", {RISCV::F8_F, RISCV::F8_D})
2719             .Cases("{f9}", "{fs1}", {RISCV::F9_F, RISCV::F9_D})
2720             .Cases("{f10}", "{fa0}", {RISCV::F10_F, RISCV::F10_D})
2721             .Cases("{f11}", "{fa1}", {RISCV::F11_F, RISCV::F11_D})
2722             .Cases("{f12}", "{fa2}", {RISCV::F12_F, RISCV::F12_D})
2723             .Cases("{f13}", "{fa3}", {RISCV::F13_F, RISCV::F13_D})
2724             .Cases("{f14}", "{fa4}", {RISCV::F14_F, RISCV::F14_D})
2725             .Cases("{f15}", "{fa5}", {RISCV::F15_F, RISCV::F15_D})
2726             .Cases("{f16}", "{fa6}", {RISCV::F16_F, RISCV::F16_D})
2727             .Cases("{f17}", "{fa7}", {RISCV::F17_F, RISCV::F17_D})
2728             .Cases("{f18}", "{fs2}", {RISCV::F18_F, RISCV::F18_D})
2729             .Cases("{f19}", "{fs3}", {RISCV::F19_F, RISCV::F19_D})
2730             .Cases("{f20}", "{fs4}", {RISCV::F20_F, RISCV::F20_D})
2731             .Cases("{f21}", "{fs5}", {RISCV::F21_F, RISCV::F21_D})
2732             .Cases("{f22}", "{fs6}", {RISCV::F22_F, RISCV::F22_D})
2733             .Cases("{f23}", "{fs7}", {RISCV::F23_F, RISCV::F23_D})
2734             .Cases("{f24}", "{fs8}", {RISCV::F24_F, RISCV::F24_D})
2735             .Cases("{f25}", "{fs9}", {RISCV::F25_F, RISCV::F25_D})
2736             .Cases("{f26}", "{fs10}", {RISCV::F26_F, RISCV::F26_D})
2737             .Cases("{f27}", "{fs11}", {RISCV::F27_F, RISCV::F27_D})
2738             .Cases("{f28}", "{ft8}", {RISCV::F28_F, RISCV::F28_D})
2739             .Cases("{f29}", "{ft9}", {RISCV::F29_F, RISCV::F29_D})
2740             .Cases("{f30}", "{ft10}", {RISCV::F30_F, RISCV::F30_D})
2741             .Cases("{f31}", "{ft11}", {RISCV::F31_F, RISCV::F31_D})
2742             .Default({RISCV::NoRegister, RISCV::NoRegister});
2743     if (FReg.first != RISCV::NoRegister)
2744       return Subtarget.hasStdExtD()
2745                  ? std::make_pair(FReg.second, &RISCV::FPR64RegClass)
2746                  : std::make_pair(FReg.first, &RISCV::FPR32RegClass);
2747   }
2748 
2749   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2750 }
2751 
2752 unsigned
2753 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
2754   // Currently only support length 1 constraints.
2755   if (ConstraintCode.size() == 1) {
2756     switch (ConstraintCode[0]) {
2757     case 'A':
2758       return InlineAsm::Constraint_A;
2759     default:
2760       break;
2761     }
2762   }
2763 
2764   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
2765 }
2766 
2767 void RISCVTargetLowering::LowerAsmOperandForConstraint(
2768     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2769     SelectionDAG &DAG) const {
2770   // Currently only support length 1 constraints.
2771   if (Constraint.length() == 1) {
2772     switch (Constraint[0]) {
2773     case 'I':
2774       // Validate & create a 12-bit signed immediate operand.
2775       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
2776         uint64_t CVal = C->getSExtValue();
2777         if (isInt<12>(CVal))
2778           Ops.push_back(
2779               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
2780       }
2781       return;
2782     case 'J':
2783       // Validate & create an integer zero operand.
2784       if (auto *C = dyn_cast<ConstantSDNode>(Op))
2785         if (C->getZExtValue() == 0)
2786           Ops.push_back(
2787               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
2788       return;
2789     case 'K':
2790       // Validate & create a 5-bit unsigned immediate operand.
2791       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
2792         uint64_t CVal = C->getZExtValue();
2793         if (isUInt<5>(CVal))
2794           Ops.push_back(
2795               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
2796       }
2797       return;
2798     default:
2799       break;
2800     }
2801   }
2802   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2803 }
2804 
2805 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
2806                                                    Instruction *Inst,
2807                                                    AtomicOrdering Ord) const {
2808   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
2809     return Builder.CreateFence(Ord);
2810   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
2811     return Builder.CreateFence(AtomicOrdering::Release);
2812   return nullptr;
2813 }
2814 
2815 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
2816                                                     Instruction *Inst,
2817                                                     AtomicOrdering Ord) const {
2818   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
2819     return Builder.CreateFence(AtomicOrdering::Acquire);
2820   return nullptr;
2821 }
2822 
2823 TargetLowering::AtomicExpansionKind
2824 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2825   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
2826   // point operations can't be used in an lr/sc sequence without breaking the
2827   // forward-progress guarantee.
2828   if (AI->isFloatingPointOperation())
2829     return AtomicExpansionKind::CmpXChg;
2830 
2831   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
2832   if (Size == 8 || Size == 16)
2833     return AtomicExpansionKind::MaskedIntrinsic;
2834   return AtomicExpansionKind::None;
2835 }
2836 
2837 static Intrinsic::ID
2838 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
2839   if (XLen == 32) {
2840     switch (BinOp) {
2841     default:
2842       llvm_unreachable("Unexpected AtomicRMW BinOp");
2843     case AtomicRMWInst::Xchg:
2844       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
2845     case AtomicRMWInst::Add:
2846       return Intrinsic::riscv_masked_atomicrmw_add_i32;
2847     case AtomicRMWInst::Sub:
2848       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
2849     case AtomicRMWInst::Nand:
2850       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
2851     case AtomicRMWInst::Max:
2852       return Intrinsic::riscv_masked_atomicrmw_max_i32;
2853     case AtomicRMWInst::Min:
2854       return Intrinsic::riscv_masked_atomicrmw_min_i32;
2855     case AtomicRMWInst::UMax:
2856       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
2857     case AtomicRMWInst::UMin:
2858       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
2859     }
2860   }
2861 
2862   if (XLen == 64) {
2863     switch (BinOp) {
2864     default:
2865       llvm_unreachable("Unexpected AtomicRMW BinOp");
2866     case AtomicRMWInst::Xchg:
2867       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
2868     case AtomicRMWInst::Add:
2869       return Intrinsic::riscv_masked_atomicrmw_add_i64;
2870     case AtomicRMWInst::Sub:
2871       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
2872     case AtomicRMWInst::Nand:
2873       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
2874     case AtomicRMWInst::Max:
2875       return Intrinsic::riscv_masked_atomicrmw_max_i64;
2876     case AtomicRMWInst::Min:
2877       return Intrinsic::riscv_masked_atomicrmw_min_i64;
2878     case AtomicRMWInst::UMax:
2879       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
2880     case AtomicRMWInst::UMin:
2881       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
2882     }
2883   }
2884 
2885   llvm_unreachable("Unexpected XLen\n");
2886 }
2887 
2888 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
2889     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
2890     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
2891   unsigned XLen = Subtarget.getXLen();
2892   Value *Ordering =
2893       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
2894   Type *Tys[] = {AlignedAddr->getType()};
2895   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
2896       AI->getModule(),
2897       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
2898 
2899   if (XLen == 64) {
2900     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
2901     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2902     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
2903   }
2904 
2905   Value *Result;
2906 
2907   // Must pass the shift amount needed to sign extend the loaded value prior
2908   // to performing a signed comparison for min/max. ShiftAmt is the number of
2909   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
2910   // is the number of bits to left+right shift the value in order to
2911   // sign-extend.
2912   if (AI->getOperation() == AtomicRMWInst::Min ||
2913       AI->getOperation() == AtomicRMWInst::Max) {
2914     const DataLayout &DL = AI->getModule()->getDataLayout();
2915     unsigned ValWidth =
2916         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
2917     Value *SextShamt =
2918         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
2919     Result = Builder.CreateCall(LrwOpScwLoop,
2920                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
2921   } else {
2922     Result =
2923         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
2924   }
2925 
2926   if (XLen == 64)
2927     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2928   return Result;
2929 }
2930 
2931 TargetLowering::AtomicExpansionKind
2932 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
2933     AtomicCmpXchgInst *CI) const {
2934   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
2935   if (Size == 8 || Size == 16)
2936     return AtomicExpansionKind::MaskedIntrinsic;
2937   return AtomicExpansionKind::None;
2938 }
2939 
2940 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
2941     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2942     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2943   unsigned XLen = Subtarget.getXLen();
2944   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
2945   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
2946   if (XLen == 64) {
2947     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
2948     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
2949     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
2950     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
2951   }
2952   Type *Tys[] = {AlignedAddr->getType()};
2953   Function *MaskedCmpXchg =
2954       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
2955   Value *Result = Builder.CreateCall(
2956       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
2957   if (XLen == 64)
2958     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
2959   return Result;
2960 }
2961 
2962 Register RISCVTargetLowering::getExceptionPointerRegister(
2963     const Constant *PersonalityFn) const {
2964   return RISCV::X10;
2965 }
2966 
2967 Register RISCVTargetLowering::getExceptionSelectorRegister(
2968     const Constant *PersonalityFn) const {
2969   return RISCV::X11;
2970 }
2971 
2972 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
2973   // Return false to suppress the unnecessary extensions if the LibCall
2974   // arguments or return value is f32 type for LP64 ABI.
2975   RISCVABI::ABI ABI = Subtarget.getTargetABI();
2976   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
2977     return false;
2978 
2979   return true;
2980 }
2981 
2982 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
2983                                                  SDValue C) const {
2984   // Check integral scalar types.
2985   if (VT.isScalarInteger()) {
2986     // Do not perform the transformation on riscv32 with the M extension.
2987     if (!Subtarget.is64Bit() && Subtarget.hasStdExtM())
2988       return false;
2989     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
2990       if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t))
2991         return false;
2992       int64_t Imm = ConstNode->getSExtValue();
2993       if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) ||
2994           isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm))
2995         return true;
2996     }
2997   }
2998 
2999   return false;
3000 }
3001 
3002 #define GET_REGISTER_MATCHER
3003 #include "RISCVGenAsmMatcher.inc"
3004 
3005 Register
3006 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
3007                                        const MachineFunction &MF) const {
3008   Register Reg = MatchRegisterAltName(RegName);
3009   if (Reg == RISCV::NoRegister)
3010     Reg = MatchRegisterName(RegName);
3011   if (Reg == RISCV::NoRegister)
3012     report_fatal_error(
3013         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
3014   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
3015   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
3016     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
3017                              StringRef(RegName) + "\"."));
3018   return Reg;
3019 }
3020