xref: /llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (revision ebb27ccb08e0579825a53b218ff5b2ddc492626a)
1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISC-V target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "MCTargetDesc/RISCVMCTargetDesc.h"
16 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVISelLowering.h"
18 #include "RISCVInstrInfo.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/IR/IntrinsicsRISCV.h"
21 #include "llvm/Support/Alignment.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "riscv-isel"
29 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30 
31 static cl::opt<bool> UsePseudoMovImm(
32     "riscv-use-rematerializable-movimm", cl::Hidden,
33     cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34              "constant materialization"),
35     cl::init(false));
36 
37 namespace llvm::RISCV {
38 #define GET_RISCVVSSEGTable_IMPL
39 #define GET_RISCVVLSEGTable_IMPL
40 #define GET_RISCVVLXSEGTable_IMPL
41 #define GET_RISCVVSXSEGTable_IMPL
42 #define GET_RISCVVLETable_IMPL
43 #define GET_RISCVVSETable_IMPL
44 #define GET_RISCVVLXTable_IMPL
45 #define GET_RISCVVSXTable_IMPL
46 #include "RISCVGenSearchableTables.inc"
47 } // namespace llvm::RISCV
48 
49 void RISCVDAGToDAGISel::PreprocessISelDAG() {
50   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
51 
52   bool MadeChange = false;
53   while (Position != CurDAG->allnodes_begin()) {
54     SDNode *N = &*--Position;
55     if (N->use_empty())
56       continue;
57 
58     SDValue Result;
59     switch (N->getOpcode()) {
60     case ISD::SPLAT_VECTOR: {
61       // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62       // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63       MVT VT = N->getSimpleValueType(0);
64       unsigned Opc =
65           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
66       SDLoc DL(N);
67       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68       SDValue Src = N->getOperand(0);
69       if (VT.isInteger())
70         Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71                               N->getOperand(0));
72       Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73       break;
74     }
75     case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
76       // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77       // load. Done after lowering and combining so that we have a chance to
78       // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79       assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80       MVT VT = N->getSimpleValueType(0);
81       SDValue Passthru = N->getOperand(0);
82       SDValue Lo = N->getOperand(1);
83       SDValue Hi = N->getOperand(2);
84       SDValue VL = N->getOperand(3);
85       assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86              Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87              "Unexpected VTs!");
88       MachineFunction &MF = CurDAG->getMachineFunction();
89       SDLoc DL(N);
90 
91       // Create temporary stack for each expanding node.
92       SDValue StackSlot =
93           CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
94       int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
95       MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
96 
97       SDValue Chain = CurDAG->getEntryNode();
98       Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99 
100       SDValue OffsetSlot =
101           CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
102       Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103                             Align(8));
104 
105       Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106 
107       SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108       SDValue IntID =
109           CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110       SDValue Ops[] = {Chain,
111                        IntID,
112                        Passthru,
113                        StackSlot,
114                        CurDAG->getRegister(RISCV::X0, MVT::i64),
115                        VL};
116 
117       Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
118                                            MVT::i64, MPI, Align(8),
119                                            MachineMemOperand::MOLoad);
120       break;
121     }
122     }
123 
124     if (Result) {
125       LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld:    ");
126       LLVM_DEBUG(N->dump(CurDAG));
127       LLVM_DEBUG(dbgs() << "\nNew: ");
128       LLVM_DEBUG(Result->dump(CurDAG));
129       LLVM_DEBUG(dbgs() << "\n");
130 
131       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
132       MadeChange = true;
133     }
134   }
135 
136   if (MadeChange)
137     CurDAG->RemoveDeadNodes();
138 }
139 
140 void RISCVDAGToDAGISel::PostprocessISelDAG() {
141   HandleSDNode Dummy(CurDAG->getRoot());
142   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
143 
144   bool MadeChange = false;
145   while (Position != CurDAG->allnodes_begin()) {
146     SDNode *N = &*--Position;
147     // Skip dead nodes and any non-machine opcodes.
148     if (N->use_empty() || !N->isMachineOpcode())
149       continue;
150 
151     MadeChange |= doPeepholeSExtW(N);
152 
153     // FIXME: This is here only because the VMerge transform doesn't
154     // know how to handle masked true inputs.  Once that has been moved
155     // to post-ISEL, this can be deleted as well.
156     MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157   }
158 
159   CurDAG->setRoot(Dummy.getValue());
160 
161   MadeChange |= doPeepholeMergeVVMFold();
162 
163   // After we're done with everything else, convert IMPLICIT_DEF
164   // passthru operands to NoRegister.  This is required to workaround
165   // an optimization deficiency in MachineCSE.  This really should
166   // be merged back into each of the patterns (i.e. there's no good
167   // reason not to go directly to NoReg), but is being done this way
168   // to allow easy backporting.
169   MadeChange |= doPeepholeNoRegPassThru();
170 
171   if (MadeChange)
172     CurDAG->RemoveDeadNodes();
173 }
174 
175 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
176                             RISCVMatInt::InstSeq &Seq) {
177   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178   for (const RISCVMatInt::Inst &Inst : Seq) {
179     SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
180     SDNode *Result = nullptr;
181     switch (Inst.getOpndKind()) {
182     case RISCVMatInt::Imm:
183       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184       break;
185     case RISCVMatInt::RegX0:
186       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187                                       CurDAG->getRegister(RISCV::X0, VT));
188       break;
189     case RISCVMatInt::RegReg:
190       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191       break;
192     case RISCVMatInt::RegImm:
193       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194       break;
195     }
196 
197     // Only the first instruction has X0 as its source.
198     SrcReg = SDValue(Result, 0);
199   }
200 
201   return SrcReg;
202 }
203 
204 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205                          int64_t Imm, const RISCVSubtarget &Subtarget) {
206   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
207 
208   // Use a rematerializable pseudo instruction for short sequences if enabled.
209   if (Seq.size() == 2 && UsePseudoMovImm)
210     return SDValue(
211         CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212                                CurDAG->getSignedTargetConstant(Imm, DL, VT)),
213         0);
214 
215   // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216   // worst an LUI+ADDIW. This will require an extra register, but avoids a
217   // constant pool.
218   // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219   // low and high 32 bits are the same and bit 31 and 63 are set.
220   if (Seq.size() > 3) {
221     unsigned ShiftAmt, AddOpc;
222     RISCVMatInt::InstSeq SeqLo =
223         RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224     if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225       SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226 
227       SDValue SLLI = SDValue(
228           CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229                                  CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230           0);
231       return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232     }
233   }
234 
235   // Otherwise, use the original sequence.
236   return selectImmSeq(CurDAG, DL, VT, Seq);
237 }
238 
239 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
240     SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
241     bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
242     bool IsLoad, MVT *IndexVT) {
243   SDValue Chain = Node->getOperand(0);
244   SDValue Glue;
245 
246   Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
247 
248   if (IsStridedOrIndexed) {
249     Operands.push_back(Node->getOperand(CurOp++)); // Index.
250     if (IndexVT)
251       *IndexVT = Operands.back()->getSimpleValueType(0);
252   }
253 
254   if (IsMasked) {
255     // Mask needs to be copied to V0.
256     SDValue Mask = Node->getOperand(CurOp++);
257     Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
258     Glue = Chain.getValue(1);
259     Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
260   }
261   SDValue VL;
262   selectVLOp(Node->getOperand(CurOp++), VL);
263   Operands.push_back(VL);
264 
265   MVT XLenVT = Subtarget->getXLenVT();
266   SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
267   Operands.push_back(SEWOp);
268 
269   // At the IR layer, all the masked load intrinsics have policy operands,
270   // none of the others do.  All have passthru operands.  For our pseudos,
271   // all loads have policy operands.
272   if (IsLoad) {
273     uint64_t Policy = RISCVII::MASK_AGNOSTIC;
274     if (IsMasked)
275       Policy = Node->getConstantOperandVal(CurOp++);
276     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
277     Operands.push_back(PolicyOp);
278   }
279 
280   Operands.push_back(Chain); // Chain.
281   if (Glue)
282     Operands.push_back(Glue);
283 }
284 
285 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286                                     bool IsStrided) {
287   SDLoc DL(Node);
288   MVT VT = Node->getSimpleValueType(0);
289   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
290   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
291 
292   unsigned CurOp = 2;
293   SmallVector<SDValue, 8> Operands;
294 
295   Operands.push_back(Node->getOperand(CurOp++));
296 
297   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298                              Operands, /*IsLoad=*/true);
299 
300   const RISCV::VLSEGPseudo *P =
301       RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302                             static_cast<unsigned>(LMUL));
303   MachineSDNode *Load =
304       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305 
306   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
307     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
308 
309   ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
310   ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
311   CurDAG->RemoveDeadNode(Node);
312 }
313 
314 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF,
315                                       bool IsMasked) {
316   SDLoc DL(Node);
317   MVT VT = Node->getSimpleValueType(0);
318   MVT XLenVT = Subtarget->getXLenVT();
319   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
320   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
321 
322   unsigned CurOp = 2;
323   SmallVector<SDValue, 7> Operands;
324 
325   Operands.push_back(Node->getOperand(CurOp++));
326 
327   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
328                              /*IsStridedOrIndexed*/ false, Operands,
329                              /*IsLoad=*/true);
330 
331   const RISCV::VLSEGPseudo *P =
332       RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
333                             Log2SEW, static_cast<unsigned>(LMUL));
334   MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
335                                                XLenVT, MVT::Other, Operands);
336 
337   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
338     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
339 
340   ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
341   ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
342   ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
343   CurDAG->RemoveDeadNode(Node);
344 }
345 
346 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
347                                      bool IsOrdered) {
348   SDLoc DL(Node);
349   MVT VT = Node->getSimpleValueType(0);
350   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
351   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
352 
353   unsigned CurOp = 2;
354   SmallVector<SDValue, 8> Operands;
355 
356   Operands.push_back(Node->getOperand(CurOp++));
357 
358   MVT IndexVT;
359   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
360                              /*IsStridedOrIndexed*/ true, Operands,
361                              /*IsLoad=*/true, &IndexVT);
362 
363 #ifndef NDEBUG
364   // Number of element = RVVBitsPerBlock * LMUL / SEW
365   unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
366   auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
367   if (DecodedLMUL.second)
368     ContainedTyNumElts /= DecodedLMUL.first;
369   else
370     ContainedTyNumElts *= DecodedLMUL.first;
371   assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
372          "Element count mismatch");
373 #endif
374 
375   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
376   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
377   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
378     report_fatal_error("The V extension does not support EEW=64 for index "
379                        "values when XLEN=32");
380   }
381   const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
382       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
383       static_cast<unsigned>(IndexLMUL));
384   MachineSDNode *Load =
385       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
386 
387   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
388     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
389 
390   ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
391   ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
392   CurDAG->RemoveDeadNode(Node);
393 }
394 
395 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
396                                     bool IsStrided) {
397   SDLoc DL(Node);
398   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
399   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
400   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
401 
402   unsigned CurOp = 2;
403   SmallVector<SDValue, 8> Operands;
404 
405   Operands.push_back(Node->getOperand(CurOp++));
406 
407   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
408                              Operands);
409 
410   const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
411       NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
412   MachineSDNode *Store =
413       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
414 
415   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
416     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
417 
418   ReplaceNode(Node, Store);
419 }
420 
421 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
422                                      bool IsOrdered) {
423   SDLoc DL(Node);
424   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
425   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
426   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
427 
428   unsigned CurOp = 2;
429   SmallVector<SDValue, 8> Operands;
430 
431   Operands.push_back(Node->getOperand(CurOp++));
432 
433   MVT IndexVT;
434   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
435                              /*IsStridedOrIndexed*/ true, Operands,
436                              /*IsLoad=*/false, &IndexVT);
437 
438 #ifndef NDEBUG
439   // Number of element = RVVBitsPerBlock * LMUL / SEW
440   unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
441   auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
442   if (DecodedLMUL.second)
443     ContainedTyNumElts /= DecodedLMUL.first;
444   else
445     ContainedTyNumElts *= DecodedLMUL.first;
446   assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
447          "Element count mismatch");
448 #endif
449 
450   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
451   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
452   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
453     report_fatal_error("The V extension does not support EEW=64 for index "
454                        "values when XLEN=32");
455   }
456   const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
457       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
458       static_cast<unsigned>(IndexLMUL));
459   MachineSDNode *Store =
460       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
461 
462   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
463     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
464 
465   ReplaceNode(Node, Store);
466 }
467 
468 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
469   if (!Subtarget->hasVInstructions())
470     return;
471 
472   assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
473 
474   SDLoc DL(Node);
475   MVT XLenVT = Subtarget->getXLenVT();
476 
477   unsigned IntNo = Node->getConstantOperandVal(0);
478 
479   assert((IntNo == Intrinsic::riscv_vsetvli ||
480           IntNo == Intrinsic::riscv_vsetvlimax) &&
481          "Unexpected vsetvli intrinsic");
482 
483   bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
484   unsigned Offset = (VLMax ? 1 : 2);
485 
486   assert(Node->getNumOperands() == Offset + 2 &&
487          "Unexpected number of operands");
488 
489   unsigned SEW =
490       RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
491   RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
492       Node->getConstantOperandVal(Offset + 1) & 0x7);
493 
494   unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
495                                             /*MaskAgnostic*/ true);
496   SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
497 
498   SDValue VLOperand;
499   unsigned Opcode = RISCV::PseudoVSETVLI;
500   if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
501     if (auto VLEN = Subtarget->getRealVLen())
502       if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
503         VLMax = true;
504   }
505   if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
506     VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
507     Opcode = RISCV::PseudoVSETVLIX0;
508   } else {
509     VLOperand = Node->getOperand(1);
510 
511     if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
512       uint64_t AVL = C->getZExtValue();
513       if (isUInt<5>(AVL)) {
514         SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
515         ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
516                                                  XLenVT, VLImm, VTypeIOp));
517         return;
518       }
519     }
520   }
521 
522   ReplaceNode(Node,
523               CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
524 }
525 
526 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
527   MVT VT = Node->getSimpleValueType(0);
528   unsigned Opcode = Node->getOpcode();
529   assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
530          "Unexpected opcode");
531   SDLoc DL(Node);
532 
533   // For operations of the form (x << C1) op C2, check if we can use
534   // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
535   SDValue N0 = Node->getOperand(0);
536   SDValue N1 = Node->getOperand(1);
537 
538   ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
539   if (!Cst)
540     return false;
541 
542   int64_t Val = Cst->getSExtValue();
543 
544   // Check if immediate can already use ANDI/ORI/XORI.
545   if (isInt<12>(Val))
546     return false;
547 
548   SDValue Shift = N0;
549 
550   // If Val is simm32 and we have a sext_inreg from i32, then the binop
551   // produces at least 33 sign bits. We can peek through the sext_inreg and use
552   // a SLLIW at the end.
553   bool SignExt = false;
554   if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
555       N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
556     SignExt = true;
557     Shift = N0.getOperand(0);
558   }
559 
560   if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
561     return false;
562 
563   ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
564   if (!ShlCst)
565     return false;
566 
567   uint64_t ShAmt = ShlCst->getZExtValue();
568 
569   // Make sure that we don't change the operation by removing bits.
570   // This only matters for OR and XOR, AND is unaffected.
571   uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
572   if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
573     return false;
574 
575   int64_t ShiftedVal = Val >> ShAmt;
576   if (!isInt<12>(ShiftedVal))
577     return false;
578 
579   // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
580   if (SignExt && ShAmt >= 32)
581     return false;
582 
583   // Ok, we can reorder to get a smaller immediate.
584   unsigned BinOpc;
585   switch (Opcode) {
586   default: llvm_unreachable("Unexpected opcode");
587   case ISD::AND: BinOpc = RISCV::ANDI; break;
588   case ISD::OR:  BinOpc = RISCV::ORI;  break;
589   case ISD::XOR: BinOpc = RISCV::XORI; break;
590   }
591 
592   unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
593 
594   SDNode *BinOp = CurDAG->getMachineNode(
595       BinOpc, DL, VT, Shift.getOperand(0),
596       CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
597   SDNode *SLLI =
598       CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
599                              CurDAG->getTargetConstant(ShAmt, DL, VT));
600   ReplaceNode(Node, SLLI);
601   return true;
602 }
603 
604 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
605   // Only supported with XTHeadBb at the moment.
606   if (!Subtarget->hasVendorXTHeadBb())
607     return false;
608 
609   auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
610   if (!N1C)
611     return false;
612 
613   SDValue N0 = Node->getOperand(0);
614   if (!N0.hasOneUse())
615     return false;
616 
617   auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
618                              MVT VT) {
619     return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
620                                   CurDAG->getTargetConstant(Msb, DL, VT),
621                                   CurDAG->getTargetConstant(Lsb, DL, VT));
622   };
623 
624   SDLoc DL(Node);
625   MVT VT = Node->getSimpleValueType(0);
626   const unsigned RightShAmt = N1C->getZExtValue();
627 
628   // Transform (sra (shl X, C1) C2) with C1 < C2
629   //        -> (TH.EXT X, msb, lsb)
630   if (N0.getOpcode() == ISD::SHL) {
631     auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
632     if (!N01C)
633       return false;
634 
635     const unsigned LeftShAmt = N01C->getZExtValue();
636     // Make sure that this is a bitfield extraction (i.e., the shift-right
637     // amount can not be less than the left-shift).
638     if (LeftShAmt > RightShAmt)
639       return false;
640 
641     const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
642     const unsigned Msb = MsbPlusOne - 1;
643     const unsigned Lsb = RightShAmt - LeftShAmt;
644 
645     SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
646     ReplaceNode(Node, TH_EXT);
647     return true;
648   }
649 
650   // Transform (sra (sext_inreg X, _), C) ->
651   //           (TH.EXT X, msb, lsb)
652   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
653     unsigned ExtSize =
654         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
655 
656     // ExtSize of 32 should use sraiw via tablegen pattern.
657     if (ExtSize == 32)
658       return false;
659 
660     const unsigned Msb = ExtSize - 1;
661     const unsigned Lsb = RightShAmt;
662 
663     SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
664     ReplaceNode(Node, TH_EXT);
665     return true;
666   }
667 
668   return false;
669 }
670 
671 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
672   // Target does not support indexed loads.
673   if (!Subtarget->hasVendorXTHeadMemIdx())
674     return false;
675 
676   LoadSDNode *Ld = cast<LoadSDNode>(Node);
677   ISD::MemIndexedMode AM = Ld->getAddressingMode();
678   if (AM == ISD::UNINDEXED)
679     return false;
680 
681   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
682   if (!C)
683     return false;
684 
685   EVT LoadVT = Ld->getMemoryVT();
686   assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
687          "Unexpected addressing mode");
688   bool IsPre = AM == ISD::PRE_INC;
689   bool IsPost = AM == ISD::POST_INC;
690   int64_t Offset = C->getSExtValue();
691 
692   // The constants that can be encoded in the THeadMemIdx instructions
693   // are of the form (sign_extend(imm5) << imm2).
694   unsigned Shift;
695   for (Shift = 0; Shift < 4; Shift++)
696     if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
697       break;
698 
699   // Constant cannot be encoded.
700   if (Shift == 4)
701     return false;
702 
703   bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
704   unsigned Opcode;
705   if (LoadVT == MVT::i8 && IsPre)
706     Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
707   else if (LoadVT == MVT::i8 && IsPost)
708     Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
709   else if (LoadVT == MVT::i16 && IsPre)
710     Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
711   else if (LoadVT == MVT::i16 && IsPost)
712     Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
713   else if (LoadVT == MVT::i32 && IsPre)
714     Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
715   else if (LoadVT == MVT::i32 && IsPost)
716     Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
717   else if (LoadVT == MVT::i64 && IsPre)
718     Opcode = RISCV::TH_LDIB;
719   else if (LoadVT == MVT::i64 && IsPost)
720     Opcode = RISCV::TH_LDIA;
721   else
722     return false;
723 
724   EVT Ty = Ld->getOffset().getValueType();
725   SDValue Ops[] = {
726       Ld->getBasePtr(),
727       CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
728       CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
729   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
730                                        Ld->getValueType(1), MVT::Other, Ops);
731 
732   MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
733   CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
734 
735   ReplaceNode(Node, New);
736 
737   return true;
738 }
739 
740 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
741   if (!Subtarget->hasVInstructions())
742     return;
743 
744   assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
745 
746   SDLoc DL(Node);
747   unsigned IntNo = Node->getConstantOperandVal(1);
748 
749   assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
750           IntNo == Intrinsic::riscv_sf_vc_i_se) &&
751          "Unexpected vsetvli intrinsic");
752 
753   // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
754   unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
755   SDValue SEWOp =
756       CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
757   SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
758                                       Node->getOperand(4), Node->getOperand(5),
759                                       Node->getOperand(8), SEWOp,
760                                       Node->getOperand(0)};
761 
762   unsigned Opcode;
763   auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
764   switch (LMulSDNode->getSExtValue()) {
765   case 5:
766     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
767                                                   : RISCV::PseudoVC_I_SE_MF8;
768     break;
769   case 6:
770     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
771                                                   : RISCV::PseudoVC_I_SE_MF4;
772     break;
773   case 7:
774     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
775                                                   : RISCV::PseudoVC_I_SE_MF2;
776     break;
777   case 0:
778     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
779                                                   : RISCV::PseudoVC_I_SE_M1;
780     break;
781   case 1:
782     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
783                                                   : RISCV::PseudoVC_I_SE_M2;
784     break;
785   case 2:
786     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
787                                                   : RISCV::PseudoVC_I_SE_M4;
788     break;
789   case 3:
790     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
791                                                   : RISCV::PseudoVC_I_SE_M8;
792     break;
793   }
794 
795   ReplaceNode(Node, CurDAG->getMachineNode(
796                         Opcode, DL, Node->getSimpleValueType(0), Operands));
797 }
798 
799 static unsigned getSegInstNF(unsigned Intrinsic) {
800 #define INST_NF_CASE(NAME, NF)                                                 \
801   case Intrinsic::riscv_##NAME##NF:                                            \
802     return NF;
803 #define INST_NF_CASE_MASK(NAME, NF)                                            \
804   case Intrinsic::riscv_##NAME##NF##_mask:                                     \
805     return NF;
806 #define INST_NF_CASE_FF(NAME, NF)                                              \
807   case Intrinsic::riscv_##NAME##NF##ff:                                        \
808     return NF;
809 #define INST_NF_CASE_FF_MASK(NAME, NF)                                         \
810   case Intrinsic::riscv_##NAME##NF##ff_mask:                                   \
811     return NF;
812 #define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME)                                \
813   MACRO_NAME(NAME, 2)                                                          \
814   MACRO_NAME(NAME, 3)                                                          \
815   MACRO_NAME(NAME, 4)                                                          \
816   MACRO_NAME(NAME, 5)                                                          \
817   MACRO_NAME(NAME, 6)                                                          \
818   MACRO_NAME(NAME, 7)                                                          \
819   MACRO_NAME(NAME, 8)
820 #define INST_ALL_NF_CASE(NAME)                                                 \
821   INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME)                                    \
822   INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
823 #define INST_ALL_NF_CASE_WITH_FF(NAME)                                         \
824   INST_ALL_NF_CASE(NAME)                                                       \
825   INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME)                                 \
826   INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
827   switch (Intrinsic) {
828   default:
829     llvm_unreachable("Unexpected segment load/store intrinsic");
830     INST_ALL_NF_CASE_WITH_FF(vlseg)
831     INST_ALL_NF_CASE(vlsseg)
832     INST_ALL_NF_CASE(vloxseg)
833     INST_ALL_NF_CASE(vluxseg)
834     INST_ALL_NF_CASE(vsseg)
835     INST_ALL_NF_CASE(vssseg)
836     INST_ALL_NF_CASE(vsoxseg)
837     INST_ALL_NF_CASE(vsuxseg)
838   }
839 }
840 
841 void RISCVDAGToDAGISel::Select(SDNode *Node) {
842   // If we have a custom node, we have already selected.
843   if (Node->isMachineOpcode()) {
844     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
845     Node->setNodeId(-1);
846     return;
847   }
848 
849   // Instruction Selection not handled by the auto-generated tablegen selection
850   // should be handled here.
851   unsigned Opcode = Node->getOpcode();
852   MVT XLenVT = Subtarget->getXLenVT();
853   SDLoc DL(Node);
854   MVT VT = Node->getSimpleValueType(0);
855 
856   bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
857 
858   switch (Opcode) {
859   case ISD::Constant: {
860     assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
861     auto *ConstNode = cast<ConstantSDNode>(Node);
862     if (ConstNode->isZero()) {
863       SDValue New =
864           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
865       ReplaceNode(Node, New.getNode());
866       return;
867     }
868     int64_t Imm = ConstNode->getSExtValue();
869     // If only the lower 8 bits are used, try to convert this to a simm6 by
870     // sign-extending bit 7. This is neutral without the C extension, and
871     // allows C.LI to be used if C is present.
872     if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
873       Imm = SignExtend64<8>(Imm);
874     // If the upper XLen-16 bits are not used, try to convert this to a simm12
875     // by sign extending bit 15.
876     if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
877         hasAllHUsers(Node))
878       Imm = SignExtend64<16>(Imm);
879     // If the upper 32-bits are not used try to convert this into a simm32 by
880     // sign extending bit 32.
881     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
882       Imm = SignExtend64<32>(Imm);
883 
884     ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
885     return;
886   }
887   case ISD::ConstantFP: {
888     const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
889 
890     bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
891     SDValue Imm;
892     // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
893     // create an integer immediate.
894     if (APF.isPosZero() || NegZeroF64)
895       Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
896     else
897       Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
898                       *Subtarget);
899 
900     bool HasZdinx = Subtarget->hasStdExtZdinx();
901     bool Is64Bit = Subtarget->is64Bit();
902     unsigned Opc;
903     switch (VT.SimpleTy) {
904     default:
905       llvm_unreachable("Unexpected size");
906     case MVT::bf16:
907       assert(Subtarget->hasStdExtZfbfmin());
908       Opc = RISCV::FMV_H_X;
909       break;
910     case MVT::f16:
911       Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
912       break;
913     case MVT::f32:
914       Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
915       break;
916     case MVT::f64:
917       // For RV32, we can't move from a GPR, we need to convert instead. This
918       // should only happen for +0.0 and -0.0.
919       assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
920       if (Is64Bit)
921         Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
922       else
923         Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
924       break;
925     }
926 
927     SDNode *Res;
928     if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
929       Res =
930           CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
931     } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
932       Res =
933           CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
934     } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
935       Res = CurDAG->getMachineNode(
936           Opc, DL, VT, Imm,
937           CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
938     else
939       Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
940 
941     // For f64 -0.0, we need to insert a fneg.d idiom.
942     if (NegZeroF64) {
943       Opc = RISCV::FSGNJN_D;
944       if (HasZdinx)
945         Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
946       Res =
947           CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
948     }
949 
950     ReplaceNode(Node, Res);
951     return;
952   }
953   case RISCVISD::BuildGPRPair:
954   case RISCVISD::BuildPairF64: {
955     if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
956       break;
957 
958     assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
959            "BuildPairF64 only handled here on rv32i_zdinx");
960 
961     SDValue Ops[] = {
962         CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
963         Node->getOperand(0),
964         CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
965         Node->getOperand(1),
966         CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
967 
968     SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
969     ReplaceNode(Node, N);
970     return;
971   }
972   case RISCVISD::SplitGPRPair:
973   case RISCVISD::SplitF64: {
974     if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
975       assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
976              "SplitF64 only handled here on rv32i_zdinx");
977 
978       if (!SDValue(Node, 0).use_empty()) {
979         SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
980                                                     Node->getValueType(0),
981                                                     Node->getOperand(0));
982         ReplaceUses(SDValue(Node, 0), Lo);
983       }
984 
985       if (!SDValue(Node, 1).use_empty()) {
986         SDValue Hi = CurDAG->getTargetExtractSubreg(
987             RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
988         ReplaceUses(SDValue(Node, 1), Hi);
989       }
990 
991       CurDAG->RemoveDeadNode(Node);
992       return;
993     }
994 
995     assert(Opcode != RISCVISD::SplitGPRPair &&
996            "SplitGPRPair should already be handled");
997 
998     if (!Subtarget->hasStdExtZfa())
999       break;
1000     assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1001            "Unexpected subtarget");
1002 
1003     // With Zfa, lower to fmv.x.w and fmvh.x.d.
1004     if (!SDValue(Node, 0).use_empty()) {
1005       SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1006                                           Node->getOperand(0));
1007       ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1008     }
1009     if (!SDValue(Node, 1).use_empty()) {
1010       SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1011                                           Node->getOperand(0));
1012       ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1013     }
1014 
1015     CurDAG->RemoveDeadNode(Node);
1016     return;
1017   }
1018   case ISD::SHL: {
1019     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1020     if (!N1C)
1021       break;
1022     SDValue N0 = Node->getOperand(0);
1023     if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1024         !isa<ConstantSDNode>(N0.getOperand(1)))
1025       break;
1026     unsigned ShAmt = N1C->getZExtValue();
1027     uint64_t Mask = N0.getConstantOperandVal(1);
1028 
1029     if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1030       unsigned XLen = Subtarget->getXLen();
1031       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1032       unsigned TrailingZeros = llvm::countr_zero(Mask);
1033       if (TrailingZeros > 0 && LeadingZeros == 32) {
1034         // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1035         // where C2 has 32 leading zeros and C3 trailing zeros.
1036         SDNode *SRLIW = CurDAG->getMachineNode(
1037             RISCV::SRLIW, DL, VT, N0->getOperand(0),
1038             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1039         SDNode *SLLI = CurDAG->getMachineNode(
1040             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1041             CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1042         ReplaceNode(Node, SLLI);
1043         return;
1044       }
1045       if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1046           XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1047         // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1048         // where C2 has C4 leading zeros and no trailing zeros.
1049         // This is profitable if the "and" was to be lowered to
1050         // (srli (slli X, C4), C4) and not (andi X, C2).
1051         // For "LeadingZeros == 32":
1052         // - with Zba it's just (slli.uw X, C)
1053         // - without Zba a tablegen pattern applies the very same
1054         //   transform as we would have done here
1055         SDNode *SLLI = CurDAG->getMachineNode(
1056             RISCV::SLLI, DL, VT, N0->getOperand(0),
1057             CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1058         SDNode *SRLI = CurDAG->getMachineNode(
1059             RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1060             CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1061         ReplaceNode(Node, SRLI);
1062         return;
1063       }
1064     }
1065     break;
1066   }
1067   case ISD::SRL: {
1068     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1069     if (!N1C)
1070       break;
1071     SDValue N0 = Node->getOperand(0);
1072     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1073       break;
1074     unsigned ShAmt = N1C->getZExtValue();
1075     uint64_t Mask = N0.getConstantOperandVal(1);
1076 
1077     // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1078     // 32 leading zeros and C3 trailing zeros.
1079     if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1080       unsigned XLen = Subtarget->getXLen();
1081       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1082       unsigned TrailingZeros = llvm::countr_zero(Mask);
1083       if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1084         SDNode *SRLIW = CurDAG->getMachineNode(
1085             RISCV::SRLIW, DL, VT, N0->getOperand(0),
1086             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1087         SDNode *SLLI = CurDAG->getMachineNode(
1088             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1089             CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1090         ReplaceNode(Node, SLLI);
1091         return;
1092       }
1093     }
1094 
1095     // Optimize (srl (and X, C2), C) ->
1096     //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
1097     // Where C2 is a mask with C3 trailing ones.
1098     // Taking into account that the C2 may have had lower bits unset by
1099     // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1100     // This pattern occurs when type legalizing right shifts for types with
1101     // less than XLen bits.
1102     Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1103     if (!isMask_64(Mask))
1104       break;
1105     unsigned TrailingOnes = llvm::countr_one(Mask);
1106     if (ShAmt >= TrailingOnes)
1107       break;
1108     // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1109     if (TrailingOnes == 32) {
1110       SDNode *SRLI = CurDAG->getMachineNode(
1111           Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1112           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1113       ReplaceNode(Node, SRLI);
1114       return;
1115     }
1116 
1117     // Only do the remaining transforms if the AND has one use.
1118     if (!N0.hasOneUse())
1119       break;
1120 
1121     // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1122     if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1123       SDNode *BEXTI = CurDAG->getMachineNode(
1124           Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1125           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1126       ReplaceNode(Node, BEXTI);
1127       return;
1128     }
1129 
1130     unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1131     if (Subtarget->hasVendorXTHeadBb()) {
1132       SDNode *THEXTU = CurDAG->getMachineNode(
1133           RISCV::TH_EXTU, DL, VT, N0->getOperand(0),
1134           CurDAG->getTargetConstant(TrailingOnes - 1, DL, VT),
1135           CurDAG->getTargetConstant(ShAmt, DL, VT));
1136       ReplaceNode(Node, THEXTU);
1137       return;
1138     }
1139 
1140     SDNode *SLLI =
1141         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1142                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1143     SDNode *SRLI = CurDAG->getMachineNode(
1144         RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1145         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1146     ReplaceNode(Node, SRLI);
1147     return;
1148   }
1149   case ISD::SRA: {
1150     if (trySignedBitfieldExtract(Node))
1151       return;
1152 
1153     // Optimize (sra (sext_inreg X, i16), C) ->
1154     //          (srai (slli X, (XLen-16), (XLen-16) + C)
1155     // And      (sra (sext_inreg X, i8), C) ->
1156     //          (srai (slli X, (XLen-8), (XLen-8) + C)
1157     // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1158     // This transform matches the code we get without Zbb. The shifts are more
1159     // compressible, and this can help expose CSE opportunities in the sdiv by
1160     // constant optimization.
1161     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1162     if (!N1C)
1163       break;
1164     SDValue N0 = Node->getOperand(0);
1165     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1166       break;
1167     unsigned ShAmt = N1C->getZExtValue();
1168     unsigned ExtSize =
1169         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1170     // ExtSize of 32 should use sraiw via tablegen pattern.
1171     if (ExtSize >= 32 || ShAmt >= ExtSize)
1172       break;
1173     unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1174     SDNode *SLLI =
1175         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1176                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1177     SDNode *SRAI = CurDAG->getMachineNode(
1178         RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1179         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1180     ReplaceNode(Node, SRAI);
1181     return;
1182   }
1183   case ISD::OR:
1184   case ISD::XOR:
1185     if (tryShrinkShlLogicImm(Node))
1186       return;
1187 
1188     break;
1189   case ISD::AND: {
1190     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1191     if (!N1C)
1192       break;
1193 
1194     SDValue N0 = Node->getOperand(0);
1195 
1196     auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1197                                           SDValue X, unsigned Msb,
1198                                           unsigned Lsb) {
1199       if (!Subtarget->hasVendorXTHeadBb())
1200         return false;
1201 
1202       SDNode *TH_EXTU = CurDAG->getMachineNode(
1203           RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1204           CurDAG->getTargetConstant(Lsb, DL, VT));
1205       ReplaceNode(Node, TH_EXTU);
1206       return true;
1207     };
1208 
1209     bool LeftShift = N0.getOpcode() == ISD::SHL;
1210     if (LeftShift || N0.getOpcode() == ISD::SRL) {
1211       auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1212       if (!C)
1213         break;
1214       unsigned C2 = C->getZExtValue();
1215       unsigned XLen = Subtarget->getXLen();
1216       assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1217 
1218       // Keep track of whether this is a c.andi. If we can't use c.andi, the
1219       // shift pair might offer more compression opportunities.
1220       // TODO: We could check for C extension here, but we don't have many lit
1221       // tests with the C extension enabled so not checking gets better
1222       // coverage.
1223       // TODO: What if ANDI faster than shift?
1224       bool IsCANDI = isInt<6>(N1C->getSExtValue());
1225 
1226       uint64_t C1 = N1C->getZExtValue();
1227 
1228       // Clear irrelevant bits in the mask.
1229       if (LeftShift)
1230         C1 &= maskTrailingZeros<uint64_t>(C2);
1231       else
1232         C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1233 
1234       // Some transforms should only be done if the shift has a single use or
1235       // the AND would become (srli (slli X, 32), 32)
1236       bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1237 
1238       SDValue X = N0.getOperand(0);
1239 
1240       // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1241       // with c3 leading zeros.
1242       if (!LeftShift && isMask_64(C1)) {
1243         unsigned Leading = XLen - llvm::bit_width(C1);
1244         if (C2 < Leading) {
1245           // If the number of leading zeros is C2+32 this can be SRLIW.
1246           if (C2 + 32 == Leading) {
1247             SDNode *SRLIW = CurDAG->getMachineNode(
1248                 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1249             ReplaceNode(Node, SRLIW);
1250             return;
1251           }
1252 
1253           // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1254           // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1255           //
1256           // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1257           // legalized and goes through DAG combine.
1258           if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1259               X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1260               cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1261             SDNode *SRAIW =
1262                 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1263                                        CurDAG->getTargetConstant(31, DL, VT));
1264             SDNode *SRLIW = CurDAG->getMachineNode(
1265                 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1266                 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1267             ReplaceNode(Node, SRLIW);
1268             return;
1269           }
1270 
1271           // Try to use an unsigned bitfield extract (e.g., th.extu) if
1272           // available.
1273           // Transform (and (srl x, C2), C1)
1274           //        -> (<bfextract> x, msb, lsb)
1275           //
1276           // Make sure to keep this below the SRLIW cases, as we always want to
1277           // prefer the more common instruction.
1278           const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1279           const unsigned Lsb = C2;
1280           if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1281             return;
1282 
1283           // (srli (slli x, c3-c2), c3).
1284           // Skip if we could use (zext.w (sraiw X, C2)).
1285           bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1286                       X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287                       cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1288           // Also Skip if we can use bexti or th.tst.
1289           Skip |= HasBitTest && Leading == XLen - 1;
1290           if (OneUseOrZExtW && !Skip) {
1291             SDNode *SLLI = CurDAG->getMachineNode(
1292                 RISCV::SLLI, DL, VT, X,
1293                 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1294             SDNode *SRLI = CurDAG->getMachineNode(
1295                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1296                 CurDAG->getTargetConstant(Leading, DL, VT));
1297             ReplaceNode(Node, SRLI);
1298             return;
1299           }
1300         }
1301       }
1302 
1303       // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1304       // shifted by c2 bits with c3 leading zeros.
1305       if (LeftShift && isShiftedMask_64(C1)) {
1306         unsigned Leading = XLen - llvm::bit_width(C1);
1307 
1308         if (C2 + Leading < XLen &&
1309             C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1310           // Use slli.uw when possible.
1311           if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1312             SDNode *SLLI_UW =
1313                 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1314                                        CurDAG->getTargetConstant(C2, DL, VT));
1315             ReplaceNode(Node, SLLI_UW);
1316             return;
1317           }
1318 
1319           // (srli (slli c2+c3), c3)
1320           if (OneUseOrZExtW && !IsCANDI) {
1321             SDNode *SLLI = CurDAG->getMachineNode(
1322                 RISCV::SLLI, DL, VT, X,
1323                 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1324             SDNode *SRLI = CurDAG->getMachineNode(
1325                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1326                 CurDAG->getTargetConstant(Leading, DL, VT));
1327             ReplaceNode(Node, SRLI);
1328             return;
1329           }
1330         }
1331       }
1332 
1333       // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1334       // shifted mask with c2 leading zeros and c3 trailing zeros.
1335       if (!LeftShift && isShiftedMask_64(C1)) {
1336         unsigned Leading = XLen - llvm::bit_width(C1);
1337         unsigned Trailing = llvm::countr_zero(C1);
1338         if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1339             !IsCANDI) {
1340           unsigned SrliOpc = RISCV::SRLI;
1341           // If the input is zexti32 we should use SRLIW.
1342           if (X.getOpcode() == ISD::AND &&
1343               isa<ConstantSDNode>(X.getOperand(1)) &&
1344               X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1345             SrliOpc = RISCV::SRLIW;
1346             X = X.getOperand(0);
1347           }
1348           SDNode *SRLI = CurDAG->getMachineNode(
1349               SrliOpc, DL, VT, X,
1350               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1351           SDNode *SLLI = CurDAG->getMachineNode(
1352               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1353               CurDAG->getTargetConstant(Trailing, DL, VT));
1354           ReplaceNode(Node, SLLI);
1355           return;
1356         }
1357         // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1358         if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1359             OneUseOrZExtW && !IsCANDI) {
1360           SDNode *SRLIW = CurDAG->getMachineNode(
1361               RISCV::SRLIW, DL, VT, X,
1362               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1363           SDNode *SLLI = CurDAG->getMachineNode(
1364               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1365               CurDAG->getTargetConstant(Trailing, DL, VT));
1366           ReplaceNode(Node, SLLI);
1367           return;
1368         }
1369         // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1370         if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1371             OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1372           SDNode *SRLI = CurDAG->getMachineNode(
1373               RISCV::SRLI, DL, VT, X,
1374               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1375           SDNode *SLLI_UW = CurDAG->getMachineNode(
1376               RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1377               CurDAG->getTargetConstant(Trailing, DL, VT));
1378           ReplaceNode(Node, SLLI_UW);
1379           return;
1380         }
1381       }
1382 
1383       // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1384       // shifted mask with no leading zeros and c3 trailing zeros.
1385       if (LeftShift && isShiftedMask_64(C1)) {
1386         unsigned Leading = XLen - llvm::bit_width(C1);
1387         unsigned Trailing = llvm::countr_zero(C1);
1388         if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1389           SDNode *SRLI = CurDAG->getMachineNode(
1390               RISCV::SRLI, DL, VT, X,
1391               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1392           SDNode *SLLI = CurDAG->getMachineNode(
1393               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1394               CurDAG->getTargetConstant(Trailing, DL, VT));
1395           ReplaceNode(Node, SLLI);
1396           return;
1397         }
1398         // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1399         if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1400           SDNode *SRLIW = CurDAG->getMachineNode(
1401               RISCV::SRLIW, DL, VT, X,
1402               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1403           SDNode *SLLI = CurDAG->getMachineNode(
1404               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1405               CurDAG->getTargetConstant(Trailing, DL, VT));
1406           ReplaceNode(Node, SLLI);
1407           return;
1408         }
1409 
1410         // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1411         if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1412             Subtarget->hasStdExtZba()) {
1413           SDNode *SRLI = CurDAG->getMachineNode(
1414               RISCV::SRLI, DL, VT, X,
1415               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1416           SDNode *SLLI_UW = CurDAG->getMachineNode(
1417               RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1418               CurDAG->getTargetConstant(Trailing, DL, VT));
1419           ReplaceNode(Node, SLLI_UW);
1420           return;
1421         }
1422       }
1423     }
1424 
1425     const uint64_t C1 = N1C->getZExtValue();
1426 
1427     if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1428         N0.hasOneUse()) {
1429       unsigned C2 = N0.getConstantOperandVal(1);
1430       unsigned XLen = Subtarget->getXLen();
1431       assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1432 
1433       SDValue X = N0.getOperand(0);
1434 
1435       // Prefer SRAIW + ANDI when possible.
1436       bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1437                   X.getOpcode() == ISD::SHL &&
1438                   isa<ConstantSDNode>(X.getOperand(1)) &&
1439                   X.getConstantOperandVal(1) == 32;
1440       // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1441       // mask with c3 leading zeros and c2 is larger than c3.
1442       if (isMask_64(C1) && !Skip) {
1443         unsigned Leading = XLen - llvm::bit_width(C1);
1444         if (C2 > Leading) {
1445           SDNode *SRAI = CurDAG->getMachineNode(
1446               RISCV::SRAI, DL, VT, X,
1447               CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1448           SDNode *SRLI = CurDAG->getMachineNode(
1449               RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1450               CurDAG->getTargetConstant(Leading, DL, VT));
1451           ReplaceNode(Node, SRLI);
1452           return;
1453         }
1454       }
1455 
1456       // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1457       // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1458       // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1459       if (isShiftedMask_64(C1) && !Skip) {
1460         unsigned Leading = XLen - llvm::bit_width(C1);
1461         unsigned Trailing = llvm::countr_zero(C1);
1462         if (C2 > Leading && Leading > 0 && Trailing > 0) {
1463           SDNode *SRAI = CurDAG->getMachineNode(
1464               RISCV::SRAI, DL, VT, N0.getOperand(0),
1465               CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1466           SDNode *SRLI = CurDAG->getMachineNode(
1467               RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1468               CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1469           SDNode *SLLI = CurDAG->getMachineNode(
1470               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1471               CurDAG->getTargetConstant(Trailing, DL, VT));
1472           ReplaceNode(Node, SLLI);
1473           return;
1474         }
1475       }
1476     }
1477 
1478     // If C1 masks off the upper bits only (but can't be formed as an
1479     // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1480     // available.
1481     // Transform (and x, C1)
1482     //        -> (<bfextract> x, msb, lsb)
1483     if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1484       const unsigned Msb = llvm::bit_width(C1) - 1;
1485       if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1486         return;
1487     }
1488 
1489     if (tryShrinkShlLogicImm(Node))
1490       return;
1491 
1492     break;
1493   }
1494   case ISD::MUL: {
1495     // Special case for calculating (mul (and X, C2), C1) where the full product
1496     // fits in XLen bits. We can shift X left by the number of leading zeros in
1497     // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1498     // product has XLen trailing zeros, putting it in the output of MULHU. This
1499     // can avoid materializing a constant in a register for C2.
1500 
1501     // RHS should be a constant.
1502     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1503     if (!N1C || !N1C->hasOneUse())
1504       break;
1505 
1506     // LHS should be an AND with constant.
1507     SDValue N0 = Node->getOperand(0);
1508     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1509       break;
1510 
1511     uint64_t C2 = N0.getConstantOperandVal(1);
1512 
1513     // Constant should be a mask.
1514     if (!isMask_64(C2))
1515       break;
1516 
1517     // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1518     // multiple users or the constant is a simm12. This prevents inserting a
1519     // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1520     // make it more costly to materialize. Otherwise, using a SLLI might allow
1521     // it to be compressed.
1522     bool IsANDIOrZExt =
1523         isInt<12>(C2) ||
1524         (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1525     // With XTHeadBb, we can use TH.EXTU.
1526     IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1527     if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1528       break;
1529     // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1530     // the constant is a simm32.
1531     bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1532     // With XTHeadBb, we can use TH.EXTU.
1533     IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1534     if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1535       break;
1536 
1537     // We need to shift left the AND input and C1 by a total of XLen bits.
1538 
1539     // How far left do we need to shift the AND input?
1540     unsigned XLen = Subtarget->getXLen();
1541     unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1542 
1543     // The constant gets shifted by the remaining amount unless that would
1544     // shift bits out.
1545     uint64_t C1 = N1C->getZExtValue();
1546     unsigned ConstantShift = XLen - LeadingZeros;
1547     if (ConstantShift > (XLen - llvm::bit_width(C1)))
1548       break;
1549 
1550     uint64_t ShiftedC1 = C1 << ConstantShift;
1551     // If this RV32, we need to sign extend the constant.
1552     if (XLen == 32)
1553       ShiftedC1 = SignExtend64<32>(ShiftedC1);
1554 
1555     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1556     SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1557     SDNode *SLLI =
1558         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1559                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1560     SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1561                                            SDValue(SLLI, 0), SDValue(Imm, 0));
1562     ReplaceNode(Node, MULHU);
1563     return;
1564   }
1565   case ISD::LOAD: {
1566     if (tryIndexedLoad(Node))
1567       return;
1568 
1569     if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1570       // We match post-incrementing load here
1571       LoadSDNode *Load = cast<LoadSDNode>(Node);
1572       if (Load->getAddressingMode() != ISD::POST_INC)
1573         break;
1574 
1575       SDValue Chain = Node->getOperand(0);
1576       SDValue Base = Node->getOperand(1);
1577       SDValue Offset = Node->getOperand(2);
1578 
1579       bool Simm12 = false;
1580       bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1581 
1582       if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1583         int ConstantVal = ConstantOffset->getSExtValue();
1584         Simm12 = isInt<12>(ConstantVal);
1585         if (Simm12)
1586           Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1587                                              Offset.getValueType());
1588       }
1589 
1590       unsigned Opcode = 0;
1591       switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1592       case MVT::i8:
1593         if (Simm12 && SignExtend)
1594           Opcode = RISCV::CV_LB_ri_inc;
1595         else if (Simm12 && !SignExtend)
1596           Opcode = RISCV::CV_LBU_ri_inc;
1597         else if (!Simm12 && SignExtend)
1598           Opcode = RISCV::CV_LB_rr_inc;
1599         else
1600           Opcode = RISCV::CV_LBU_rr_inc;
1601         break;
1602       case MVT::i16:
1603         if (Simm12 && SignExtend)
1604           Opcode = RISCV::CV_LH_ri_inc;
1605         else if (Simm12 && !SignExtend)
1606           Opcode = RISCV::CV_LHU_ri_inc;
1607         else if (!Simm12 && SignExtend)
1608           Opcode = RISCV::CV_LH_rr_inc;
1609         else
1610           Opcode = RISCV::CV_LHU_rr_inc;
1611         break;
1612       case MVT::i32:
1613         if (Simm12)
1614           Opcode = RISCV::CV_LW_ri_inc;
1615         else
1616           Opcode = RISCV::CV_LW_rr_inc;
1617         break;
1618       default:
1619         break;
1620       }
1621       if (!Opcode)
1622         break;
1623 
1624       ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1625                                                Chain.getSimpleValueType(), Base,
1626                                                Offset, Chain));
1627       return;
1628     }
1629     break;
1630   }
1631   case ISD::INTRINSIC_WO_CHAIN: {
1632     unsigned IntNo = Node->getConstantOperandVal(0);
1633     switch (IntNo) {
1634       // By default we do not custom select any intrinsic.
1635     default:
1636       break;
1637     case Intrinsic::riscv_vmsgeu:
1638     case Intrinsic::riscv_vmsge: {
1639       SDValue Src1 = Node->getOperand(1);
1640       SDValue Src2 = Node->getOperand(2);
1641       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1642       bool IsCmpConstant = false;
1643       bool IsCmpMinimum = false;
1644       // Only custom select scalar second operand.
1645       if (Src2.getValueType() != XLenVT)
1646         break;
1647       // Small constants are handled with patterns.
1648       int64_t CVal = 0;
1649       MVT Src1VT = Src1.getSimpleValueType();
1650       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1651         IsCmpConstant = true;
1652         CVal = C->getSExtValue();
1653         if (CVal >= -15 && CVal <= 16) {
1654           if (!IsUnsigned || CVal != 0)
1655             break;
1656           IsCmpMinimum = true;
1657         } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1658                                               Src1VT.getScalarSizeInBits())
1659                                               .getSExtValue()) {
1660           IsCmpMinimum = true;
1661         }
1662       }
1663       unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1664       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1665       default:
1666         llvm_unreachable("Unexpected LMUL!");
1667 #define CASE_VMSLT_OPCODES(lmulenum, suffix)                                   \
1668   case RISCVII::VLMUL::lmulenum:                                               \
1669     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1670                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1671     VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix                 \
1672                              : RISCV::PseudoVMSGT_VX_##suffix;                 \
1673     break;
1674         CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1675         CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1676         CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1677         CASE_VMSLT_OPCODES(LMUL_1, M1)
1678         CASE_VMSLT_OPCODES(LMUL_2, M2)
1679         CASE_VMSLT_OPCODES(LMUL_4, M4)
1680         CASE_VMSLT_OPCODES(LMUL_8, M8)
1681 #undef CASE_VMSLT_OPCODES
1682       }
1683       // Mask operations use the LMUL from the mask type.
1684       switch (RISCVTargetLowering::getLMUL(VT)) {
1685       default:
1686         llvm_unreachable("Unexpected LMUL!");
1687 #define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)                            \
1688   case RISCVII::VLMUL::lmulenum:                                               \
1689     VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
1690     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix;                               \
1691     break;
1692         CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1693         CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1694         CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1695         CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1696         CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1697         CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1698         CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1699 #undef CASE_VMNAND_VMSET_OPCODES
1700       }
1701       SDValue SEW = CurDAG->getTargetConstant(
1702           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1703       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1704       SDValue VL;
1705       selectVLOp(Node->getOperand(3), VL);
1706 
1707       // If vmsge(u) with minimum value, expand it to vmset.
1708       if (IsCmpMinimum) {
1709         ReplaceNode(Node,
1710                     CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1711         return;
1712       }
1713 
1714       if (IsCmpConstant) {
1715         SDValue Imm =
1716             selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1717 
1718         ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1719                                                  {Src1, Imm, VL, SEW}));
1720         return;
1721       }
1722 
1723       // Expand to
1724       // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1725       SDValue Cmp = SDValue(
1726           CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1727           0);
1728       ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1729                                                {Cmp, Cmp, VL, MaskSEW}));
1730       return;
1731     }
1732     case Intrinsic::riscv_vmsgeu_mask:
1733     case Intrinsic::riscv_vmsge_mask: {
1734       SDValue Src1 = Node->getOperand(2);
1735       SDValue Src2 = Node->getOperand(3);
1736       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1737       bool IsCmpConstant = false;
1738       bool IsCmpMinimum = false;
1739       // Only custom select scalar second operand.
1740       if (Src2.getValueType() != XLenVT)
1741         break;
1742       // Small constants are handled with patterns.
1743       MVT Src1VT = Src1.getSimpleValueType();
1744       int64_t CVal = 0;
1745       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1746         IsCmpConstant = true;
1747         CVal = C->getSExtValue();
1748         if (CVal >= -15 && CVal <= 16) {
1749           if (!IsUnsigned || CVal != 0)
1750             break;
1751           IsCmpMinimum = true;
1752         } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1753                                               Src1VT.getScalarSizeInBits())
1754                                               .getSExtValue()) {
1755           IsCmpMinimum = true;
1756         }
1757       }
1758       unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1759           VMOROpcode, VMSGTMaskOpcode;
1760       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1761       default:
1762         llvm_unreachable("Unexpected LMUL!");
1763 #define CASE_VMSLT_OPCODES(lmulenum, suffix)                                   \
1764   case RISCVII::VLMUL::lmulenum:                                               \
1765     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1766                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1767     VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1768                                  : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1769     VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK      \
1770                                  : RISCV::PseudoVMSGT_VX_##suffix##_MASK;      \
1771     break;
1772         CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1773         CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1774         CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1775         CASE_VMSLT_OPCODES(LMUL_1, M1)
1776         CASE_VMSLT_OPCODES(LMUL_2, M2)
1777         CASE_VMSLT_OPCODES(LMUL_4, M4)
1778         CASE_VMSLT_OPCODES(LMUL_8, M8)
1779 #undef CASE_VMSLT_OPCODES
1780       }
1781       // Mask operations use the LMUL from the mask type.
1782       switch (RISCVTargetLowering::getLMUL(VT)) {
1783       default:
1784         llvm_unreachable("Unexpected LMUL!");
1785 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)                       \
1786   case RISCVII::VLMUL::lmulenum:                                               \
1787     VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1788     VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1789     VMOROpcode = RISCV::PseudoVMOR_MM_##suffix;                                \
1790     break;
1791         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
1792         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
1793         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
1794         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8)
1795         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4)
1796         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2)
1797         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1)
1798 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1799       }
1800       SDValue SEW = CurDAG->getTargetConstant(
1801           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1802       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1803       SDValue VL;
1804       selectVLOp(Node->getOperand(5), VL);
1805       SDValue MaskedOff = Node->getOperand(1);
1806       SDValue Mask = Node->getOperand(4);
1807 
1808       // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
1809       if (IsCmpMinimum) {
1810         // We don't need vmor if the MaskedOff and the Mask are the same
1811         // value.
1812         if (Mask == MaskedOff) {
1813           ReplaceUses(Node, Mask.getNode());
1814           return;
1815         }
1816         ReplaceNode(Node,
1817                     CurDAG->getMachineNode(VMOROpcode, DL, VT,
1818                                            {Mask, MaskedOff, VL, MaskSEW}));
1819         return;
1820       }
1821 
1822       // If the MaskedOff value and the Mask are the same value use
1823       // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1824       // This avoids needing to copy v0 to vd before starting the next sequence.
1825       if (Mask == MaskedOff) {
1826         SDValue Cmp = SDValue(
1827             CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1828             0);
1829         ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1830                                                  {Mask, Cmp, VL, MaskSEW}));
1831         return;
1832       }
1833 
1834       // Mask needs to be copied to V0.
1835       SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1836                                            RISCV::V0, Mask, SDValue());
1837       SDValue Glue = Chain.getValue(1);
1838       SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1839 
1840       if (IsCmpConstant) {
1841         SDValue Imm =
1842             selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1843 
1844         ReplaceNode(Node, CurDAG->getMachineNode(
1845                               VMSGTMaskOpcode, DL, VT,
1846                               {MaskedOff, Src1, Imm, V0, VL, SEW, Glue}));
1847         return;
1848       }
1849 
1850       // Otherwise use
1851       // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1852       // The result is mask undisturbed.
1853       // We use the same instructions to emulate mask agnostic behavior, because
1854       // the agnostic result can be either undisturbed or all 1.
1855       SDValue Cmp = SDValue(
1856           CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1857                                  {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1858           0);
1859       // vmxor.mm vd, vd, v0 is used to update active value.
1860       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1861                                                {Cmp, Mask, VL, MaskSEW}));
1862       return;
1863     }
1864     case Intrinsic::riscv_vsetvli:
1865     case Intrinsic::riscv_vsetvlimax:
1866       return selectVSETVLI(Node);
1867     }
1868     break;
1869   }
1870   case ISD::INTRINSIC_W_CHAIN: {
1871     unsigned IntNo = Node->getConstantOperandVal(1);
1872     switch (IntNo) {
1873       // By default we do not custom select any intrinsic.
1874     default:
1875       break;
1876     case Intrinsic::riscv_vlseg2:
1877     case Intrinsic::riscv_vlseg3:
1878     case Intrinsic::riscv_vlseg4:
1879     case Intrinsic::riscv_vlseg5:
1880     case Intrinsic::riscv_vlseg6:
1881     case Intrinsic::riscv_vlseg7:
1882     case Intrinsic::riscv_vlseg8: {
1883       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1884                   /*IsStrided*/ false);
1885       return;
1886     }
1887     case Intrinsic::riscv_vlseg2_mask:
1888     case Intrinsic::riscv_vlseg3_mask:
1889     case Intrinsic::riscv_vlseg4_mask:
1890     case Intrinsic::riscv_vlseg5_mask:
1891     case Intrinsic::riscv_vlseg6_mask:
1892     case Intrinsic::riscv_vlseg7_mask:
1893     case Intrinsic::riscv_vlseg8_mask: {
1894       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1895                   /*IsStrided*/ false);
1896       return;
1897     }
1898     case Intrinsic::riscv_vlsseg2:
1899     case Intrinsic::riscv_vlsseg3:
1900     case Intrinsic::riscv_vlsseg4:
1901     case Intrinsic::riscv_vlsseg5:
1902     case Intrinsic::riscv_vlsseg6:
1903     case Intrinsic::riscv_vlsseg7:
1904     case Intrinsic::riscv_vlsseg8: {
1905       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1906                   /*IsStrided*/ true);
1907       return;
1908     }
1909     case Intrinsic::riscv_vlsseg2_mask:
1910     case Intrinsic::riscv_vlsseg3_mask:
1911     case Intrinsic::riscv_vlsseg4_mask:
1912     case Intrinsic::riscv_vlsseg5_mask:
1913     case Intrinsic::riscv_vlsseg6_mask:
1914     case Intrinsic::riscv_vlsseg7_mask:
1915     case Intrinsic::riscv_vlsseg8_mask: {
1916       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1917                   /*IsStrided*/ true);
1918       return;
1919     }
1920     case Intrinsic::riscv_vloxseg2:
1921     case Intrinsic::riscv_vloxseg3:
1922     case Intrinsic::riscv_vloxseg4:
1923     case Intrinsic::riscv_vloxseg5:
1924     case Intrinsic::riscv_vloxseg6:
1925     case Intrinsic::riscv_vloxseg7:
1926     case Intrinsic::riscv_vloxseg8:
1927       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1928                    /*IsOrdered*/ true);
1929       return;
1930     case Intrinsic::riscv_vluxseg2:
1931     case Intrinsic::riscv_vluxseg3:
1932     case Intrinsic::riscv_vluxseg4:
1933     case Intrinsic::riscv_vluxseg5:
1934     case Intrinsic::riscv_vluxseg6:
1935     case Intrinsic::riscv_vluxseg7:
1936     case Intrinsic::riscv_vluxseg8:
1937       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
1938                    /*IsOrdered*/ false);
1939       return;
1940     case Intrinsic::riscv_vloxseg2_mask:
1941     case Intrinsic::riscv_vloxseg3_mask:
1942     case Intrinsic::riscv_vloxseg4_mask:
1943     case Intrinsic::riscv_vloxseg5_mask:
1944     case Intrinsic::riscv_vloxseg6_mask:
1945     case Intrinsic::riscv_vloxseg7_mask:
1946     case Intrinsic::riscv_vloxseg8_mask:
1947       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1948                    /*IsOrdered*/ true);
1949       return;
1950     case Intrinsic::riscv_vluxseg2_mask:
1951     case Intrinsic::riscv_vluxseg3_mask:
1952     case Intrinsic::riscv_vluxseg4_mask:
1953     case Intrinsic::riscv_vluxseg5_mask:
1954     case Intrinsic::riscv_vluxseg6_mask:
1955     case Intrinsic::riscv_vluxseg7_mask:
1956     case Intrinsic::riscv_vluxseg8_mask:
1957       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
1958                    /*IsOrdered*/ false);
1959       return;
1960     case Intrinsic::riscv_vlseg8ff:
1961     case Intrinsic::riscv_vlseg7ff:
1962     case Intrinsic::riscv_vlseg6ff:
1963     case Intrinsic::riscv_vlseg5ff:
1964     case Intrinsic::riscv_vlseg4ff:
1965     case Intrinsic::riscv_vlseg3ff:
1966     case Intrinsic::riscv_vlseg2ff: {
1967       selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
1968       return;
1969     }
1970     case Intrinsic::riscv_vlseg8ff_mask:
1971     case Intrinsic::riscv_vlseg7ff_mask:
1972     case Intrinsic::riscv_vlseg6ff_mask:
1973     case Intrinsic::riscv_vlseg5ff_mask:
1974     case Intrinsic::riscv_vlseg4ff_mask:
1975     case Intrinsic::riscv_vlseg3ff_mask:
1976     case Intrinsic::riscv_vlseg2ff_mask: {
1977       selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
1978       return;
1979     }
1980     case Intrinsic::riscv_vloxei:
1981     case Intrinsic::riscv_vloxei_mask:
1982     case Intrinsic::riscv_vluxei:
1983     case Intrinsic::riscv_vluxei_mask: {
1984       bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1985                       IntNo == Intrinsic::riscv_vluxei_mask;
1986       bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1987                        IntNo == Intrinsic::riscv_vloxei_mask;
1988 
1989       MVT VT = Node->getSimpleValueType(0);
1990       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1991 
1992       unsigned CurOp = 2;
1993       SmallVector<SDValue, 8> Operands;
1994       Operands.push_back(Node->getOperand(CurOp++));
1995 
1996       MVT IndexVT;
1997       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1998                                  /*IsStridedOrIndexed*/ true, Operands,
1999                                  /*IsLoad=*/true, &IndexVT);
2000 
2001       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2002              "Element count mismatch");
2003 
2004       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2005       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2006       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2007       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2008         report_fatal_error("The V extension does not support EEW=64 for index "
2009                            "values when XLEN=32");
2010       }
2011       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2012           IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2013           static_cast<unsigned>(IndexLMUL));
2014       MachineSDNode *Load =
2015           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2016 
2017       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2018         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2019 
2020       ReplaceNode(Node, Load);
2021       return;
2022     }
2023     case Intrinsic::riscv_vlm:
2024     case Intrinsic::riscv_vle:
2025     case Intrinsic::riscv_vle_mask:
2026     case Intrinsic::riscv_vlse:
2027     case Intrinsic::riscv_vlse_mask: {
2028       bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2029                       IntNo == Intrinsic::riscv_vlse_mask;
2030       bool IsStrided =
2031           IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2032 
2033       MVT VT = Node->getSimpleValueType(0);
2034       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2035 
2036       // The riscv_vlm intrinsic are always tail agnostic and no passthru
2037       // operand at the IR level.  In pseudos, they have both policy and
2038       // passthru operand. The passthru operand is needed to track the
2039       // "tail undefined" state, and the policy is there just for
2040       // for consistency - it will always be "don't care" for the
2041       // unmasked form.
2042       bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2043       unsigned CurOp = 2;
2044       SmallVector<SDValue, 8> Operands;
2045       if (HasPassthruOperand)
2046         Operands.push_back(Node->getOperand(CurOp++));
2047       else {
2048         // We eagerly lower to implicit_def (instead of undef), as we
2049         // otherwise fail to select nodes such as: nxv1i1 = undef
2050         SDNode *Passthru =
2051           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2052         Operands.push_back(SDValue(Passthru, 0));
2053       }
2054       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2055                                  Operands, /*IsLoad=*/true);
2056 
2057       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2058       const RISCV::VLEPseudo *P =
2059           RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2060                               static_cast<unsigned>(LMUL));
2061       MachineSDNode *Load =
2062           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2063 
2064       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2065         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2066 
2067       ReplaceNode(Node, Load);
2068       return;
2069     }
2070     case Intrinsic::riscv_vleff:
2071     case Intrinsic::riscv_vleff_mask: {
2072       bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2073 
2074       MVT VT = Node->getSimpleValueType(0);
2075       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2076 
2077       unsigned CurOp = 2;
2078       SmallVector<SDValue, 7> Operands;
2079       Operands.push_back(Node->getOperand(CurOp++));
2080       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2081                                  /*IsStridedOrIndexed*/ false, Operands,
2082                                  /*IsLoad=*/true);
2083 
2084       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2085       const RISCV::VLEPseudo *P =
2086           RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2087                               Log2SEW, static_cast<unsigned>(LMUL));
2088       MachineSDNode *Load = CurDAG->getMachineNode(
2089           P->Pseudo, DL, Node->getVTList(), Operands);
2090       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2091         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2092 
2093       ReplaceNode(Node, Load);
2094       return;
2095     }
2096     }
2097     break;
2098   }
2099   case ISD::INTRINSIC_VOID: {
2100     unsigned IntNo = Node->getConstantOperandVal(1);
2101     switch (IntNo) {
2102     case Intrinsic::riscv_vsseg2:
2103     case Intrinsic::riscv_vsseg3:
2104     case Intrinsic::riscv_vsseg4:
2105     case Intrinsic::riscv_vsseg5:
2106     case Intrinsic::riscv_vsseg6:
2107     case Intrinsic::riscv_vsseg7:
2108     case Intrinsic::riscv_vsseg8: {
2109       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2110                   /*IsStrided*/ false);
2111       return;
2112     }
2113     case Intrinsic::riscv_vsseg2_mask:
2114     case Intrinsic::riscv_vsseg3_mask:
2115     case Intrinsic::riscv_vsseg4_mask:
2116     case Intrinsic::riscv_vsseg5_mask:
2117     case Intrinsic::riscv_vsseg6_mask:
2118     case Intrinsic::riscv_vsseg7_mask:
2119     case Intrinsic::riscv_vsseg8_mask: {
2120       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2121                   /*IsStrided*/ false);
2122       return;
2123     }
2124     case Intrinsic::riscv_vssseg2:
2125     case Intrinsic::riscv_vssseg3:
2126     case Intrinsic::riscv_vssseg4:
2127     case Intrinsic::riscv_vssseg5:
2128     case Intrinsic::riscv_vssseg6:
2129     case Intrinsic::riscv_vssseg7:
2130     case Intrinsic::riscv_vssseg8: {
2131       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2132                   /*IsStrided*/ true);
2133       return;
2134     }
2135     case Intrinsic::riscv_vssseg2_mask:
2136     case Intrinsic::riscv_vssseg3_mask:
2137     case Intrinsic::riscv_vssseg4_mask:
2138     case Intrinsic::riscv_vssseg5_mask:
2139     case Intrinsic::riscv_vssseg6_mask:
2140     case Intrinsic::riscv_vssseg7_mask:
2141     case Intrinsic::riscv_vssseg8_mask: {
2142       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2143                   /*IsStrided*/ true);
2144       return;
2145     }
2146     case Intrinsic::riscv_vsoxseg2:
2147     case Intrinsic::riscv_vsoxseg3:
2148     case Intrinsic::riscv_vsoxseg4:
2149     case Intrinsic::riscv_vsoxseg5:
2150     case Intrinsic::riscv_vsoxseg6:
2151     case Intrinsic::riscv_vsoxseg7:
2152     case Intrinsic::riscv_vsoxseg8:
2153       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2154                    /*IsOrdered*/ true);
2155       return;
2156     case Intrinsic::riscv_vsuxseg2:
2157     case Intrinsic::riscv_vsuxseg3:
2158     case Intrinsic::riscv_vsuxseg4:
2159     case Intrinsic::riscv_vsuxseg5:
2160     case Intrinsic::riscv_vsuxseg6:
2161     case Intrinsic::riscv_vsuxseg7:
2162     case Intrinsic::riscv_vsuxseg8:
2163       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2164                    /*IsOrdered*/ false);
2165       return;
2166     case Intrinsic::riscv_vsoxseg2_mask:
2167     case Intrinsic::riscv_vsoxseg3_mask:
2168     case Intrinsic::riscv_vsoxseg4_mask:
2169     case Intrinsic::riscv_vsoxseg5_mask:
2170     case Intrinsic::riscv_vsoxseg6_mask:
2171     case Intrinsic::riscv_vsoxseg7_mask:
2172     case Intrinsic::riscv_vsoxseg8_mask:
2173       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2174                    /*IsOrdered*/ true);
2175       return;
2176     case Intrinsic::riscv_vsuxseg2_mask:
2177     case Intrinsic::riscv_vsuxseg3_mask:
2178     case Intrinsic::riscv_vsuxseg4_mask:
2179     case Intrinsic::riscv_vsuxseg5_mask:
2180     case Intrinsic::riscv_vsuxseg6_mask:
2181     case Intrinsic::riscv_vsuxseg7_mask:
2182     case Intrinsic::riscv_vsuxseg8_mask:
2183       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2184                    /*IsOrdered*/ false);
2185       return;
2186     case Intrinsic::riscv_vsoxei:
2187     case Intrinsic::riscv_vsoxei_mask:
2188     case Intrinsic::riscv_vsuxei:
2189     case Intrinsic::riscv_vsuxei_mask: {
2190       bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2191                       IntNo == Intrinsic::riscv_vsuxei_mask;
2192       bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2193                        IntNo == Intrinsic::riscv_vsoxei_mask;
2194 
2195       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2196       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2197 
2198       unsigned CurOp = 2;
2199       SmallVector<SDValue, 8> Operands;
2200       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2201 
2202       MVT IndexVT;
2203       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2204                                  /*IsStridedOrIndexed*/ true, Operands,
2205                                  /*IsLoad=*/false, &IndexVT);
2206 
2207       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2208              "Element count mismatch");
2209 
2210       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2211       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2212       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2213       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2214         report_fatal_error("The V extension does not support EEW=64 for index "
2215                            "values when XLEN=32");
2216       }
2217       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2218           IsMasked, IsOrdered, IndexLog2EEW,
2219           static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2220       MachineSDNode *Store =
2221           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2222 
2223       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2224         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2225 
2226       ReplaceNode(Node, Store);
2227       return;
2228     }
2229     case Intrinsic::riscv_vsm:
2230     case Intrinsic::riscv_vse:
2231     case Intrinsic::riscv_vse_mask:
2232     case Intrinsic::riscv_vsse:
2233     case Intrinsic::riscv_vsse_mask: {
2234       bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2235                       IntNo == Intrinsic::riscv_vsse_mask;
2236       bool IsStrided =
2237           IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2238 
2239       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2240       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2241 
2242       unsigned CurOp = 2;
2243       SmallVector<SDValue, 8> Operands;
2244       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2245 
2246       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2247                                  Operands);
2248 
2249       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2250       const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2251           IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2252       MachineSDNode *Store =
2253           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2254       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2255         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2256 
2257       ReplaceNode(Node, Store);
2258       return;
2259     }
2260     case Intrinsic::riscv_sf_vc_x_se:
2261     case Intrinsic::riscv_sf_vc_i_se:
2262       selectSF_VC_X_SE(Node);
2263       return;
2264     }
2265     break;
2266   }
2267   case ISD::BITCAST: {
2268     MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2269     // Just drop bitcasts between vectors if both are fixed or both are
2270     // scalable.
2271     if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2272         (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2273       ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2274       CurDAG->RemoveDeadNode(Node);
2275       return;
2276     }
2277     break;
2278   }
2279   case ISD::INSERT_SUBVECTOR:
2280   case RISCVISD::TUPLE_INSERT: {
2281     SDValue V = Node->getOperand(0);
2282     SDValue SubV = Node->getOperand(1);
2283     SDLoc DL(SubV);
2284     auto Idx = Node->getConstantOperandVal(2);
2285     MVT SubVecVT = SubV.getSimpleValueType();
2286 
2287     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2288     MVT SubVecContainerVT = SubVecVT;
2289     // Establish the correct scalable-vector types for any fixed-length type.
2290     if (SubVecVT.isFixedLengthVector()) {
2291       SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2292       TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2293       [[maybe_unused]] bool ExactlyVecRegSized =
2294           Subtarget->expandVScale(SubVecVT.getSizeInBits())
2295               .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2296       assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2297                                .getKnownMinValue()));
2298       assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2299     }
2300     MVT ContainerVT = VT;
2301     if (VT.isFixedLengthVector())
2302       ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2303 
2304     const auto *TRI = Subtarget->getRegisterInfo();
2305     unsigned SubRegIdx;
2306     std::tie(SubRegIdx, Idx) =
2307         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2308             ContainerVT, SubVecContainerVT, Idx, TRI);
2309 
2310     // If the Idx hasn't been completely eliminated then this is a subvector
2311     // insert which doesn't naturally align to a vector register. These must
2312     // be handled using instructions to manipulate the vector registers.
2313     if (Idx != 0)
2314       break;
2315 
2316     RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2317     [[maybe_unused]] bool IsSubVecPartReg =
2318         SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2319         SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2320         SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2321     assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2322             V.isUndef()) &&
2323            "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2324            "the subvector is smaller than a full-sized register");
2325 
2326     // If we haven't set a SubRegIdx, then we must be going between
2327     // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2328     if (SubRegIdx == RISCV::NoSubRegister) {
2329       unsigned InRegClassID =
2330           RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT);
2331       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2332                  InRegClassID &&
2333              "Unexpected subvector extraction");
2334       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2335       SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2336                                                DL, VT, SubV, RC);
2337       ReplaceNode(Node, NewNode);
2338       return;
2339     }
2340 
2341     SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2342     ReplaceNode(Node, Insert.getNode());
2343     return;
2344   }
2345   case ISD::EXTRACT_SUBVECTOR:
2346   case RISCVISD::TUPLE_EXTRACT: {
2347     SDValue V = Node->getOperand(0);
2348     auto Idx = Node->getConstantOperandVal(1);
2349     MVT InVT = V.getSimpleValueType();
2350     SDLoc DL(V);
2351 
2352     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2353     MVT SubVecContainerVT = VT;
2354     // Establish the correct scalable-vector types for any fixed-length type.
2355     if (VT.isFixedLengthVector()) {
2356       assert(Idx == 0);
2357       SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2358     }
2359     if (InVT.isFixedLengthVector())
2360       InVT = TLI.getContainerForFixedLengthVector(InVT);
2361 
2362     const auto *TRI = Subtarget->getRegisterInfo();
2363     unsigned SubRegIdx;
2364     std::tie(SubRegIdx, Idx) =
2365         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2366             InVT, SubVecContainerVT, Idx, TRI);
2367 
2368     // If the Idx hasn't been completely eliminated then this is a subvector
2369     // extract which doesn't naturally align to a vector register. These must
2370     // be handled using instructions to manipulate the vector registers.
2371     if (Idx != 0)
2372       break;
2373 
2374     // If we haven't set a SubRegIdx, then we must be going between
2375     // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2376     if (SubRegIdx == RISCV::NoSubRegister) {
2377       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2378       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2379                  InRegClassID &&
2380              "Unexpected subvector extraction");
2381       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2382       SDNode *NewNode =
2383           CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2384       ReplaceNode(Node, NewNode);
2385       return;
2386     }
2387 
2388     SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2389     ReplaceNode(Node, Extract.getNode());
2390     return;
2391   }
2392   case RISCVISD::VMV_S_X_VL:
2393   case RISCVISD::VFMV_S_F_VL:
2394   case RISCVISD::VMV_V_X_VL:
2395   case RISCVISD::VFMV_V_F_VL: {
2396     // Try to match splat of a scalar load to a strided load with stride of x0.
2397     bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2398                         Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2399     if (!Node->getOperand(0).isUndef())
2400       break;
2401     SDValue Src = Node->getOperand(1);
2402     auto *Ld = dyn_cast<LoadSDNode>(Src);
2403     // Can't fold load update node because the second
2404     // output is used so that load update node can't be removed.
2405     if (!Ld || Ld->isIndexed())
2406       break;
2407     EVT MemVT = Ld->getMemoryVT();
2408     // The memory VT should be the same size as the element type.
2409     if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2410       break;
2411     if (!IsProfitableToFold(Src, Node, Node) ||
2412         !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2413       break;
2414 
2415     SDValue VL;
2416     if (IsScalarMove) {
2417       // We could deal with more VL if we update the VSETVLI insert pass to
2418       // avoid introducing more VSETVLI.
2419       if (!isOneConstant(Node->getOperand(2)))
2420         break;
2421       selectVLOp(Node->getOperand(2), VL);
2422     } else
2423       selectVLOp(Node->getOperand(2), VL);
2424 
2425     unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2426     SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2427 
2428     // If VL=1, then we don't need to do a strided load and can just do a
2429     // regular load.
2430     bool IsStrided = !isOneConstant(VL);
2431 
2432     // Only do a strided load if we have optimized zero-stride vector load.
2433     if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2434       break;
2435 
2436     SmallVector<SDValue> Operands = {
2437         SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2438         Ld->getBasePtr()};
2439     if (IsStrided)
2440       Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2441     uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
2442     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2443     Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2444 
2445     RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2446     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2447         /*IsMasked*/ false, IsStrided, /*FF*/ false,
2448         Log2SEW, static_cast<unsigned>(LMUL));
2449     MachineSDNode *Load =
2450         CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2451     // Update the chain.
2452     ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2453     // Record the mem-refs
2454     CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2455     // Replace the splat with the vlse.
2456     ReplaceNode(Node, Load);
2457     return;
2458   }
2459   case ISD::PREFETCH:
2460     unsigned Locality = Node->getConstantOperandVal(3);
2461     if (Locality > 2)
2462       break;
2463 
2464     if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2465       MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2466       MMO->setFlags(MachineMemOperand::MONonTemporal);
2467 
2468       int NontemporalLevel = 0;
2469       switch (Locality) {
2470       case 0:
2471         NontemporalLevel = 3; // NTL.ALL
2472         break;
2473       case 1:
2474         NontemporalLevel = 1; // NTL.PALL
2475         break;
2476       case 2:
2477         NontemporalLevel = 0; // NTL.P1
2478         break;
2479       default:
2480         llvm_unreachable("unexpected locality value.");
2481       }
2482 
2483       if (NontemporalLevel & 0b1)
2484         MMO->setFlags(MONontemporalBit0);
2485       if (NontemporalLevel & 0b10)
2486         MMO->setFlags(MONontemporalBit1);
2487     }
2488     break;
2489   }
2490 
2491   // Select the default instruction.
2492   SelectCode(Node);
2493 }
2494 
2495 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2496     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2497     std::vector<SDValue> &OutOps) {
2498   // Always produce a register and immediate operand, as expected by
2499   // RISCVAsmPrinter::PrintAsmMemoryOperand.
2500   switch (ConstraintID) {
2501   case InlineAsm::ConstraintCode::o:
2502   case InlineAsm::ConstraintCode::m: {
2503     SDValue Op0, Op1;
2504     [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2505     assert(Found && "SelectAddrRegImm should always succeed");
2506     OutOps.push_back(Op0);
2507     OutOps.push_back(Op1);
2508     return false;
2509   }
2510   case InlineAsm::ConstraintCode::A:
2511     OutOps.push_back(Op);
2512     OutOps.push_back(
2513         CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2514     return false;
2515   default:
2516     report_fatal_error("Unexpected asm memory constraint " +
2517                        InlineAsm::getMemConstraintName(ConstraintID));
2518   }
2519 
2520   return true;
2521 }
2522 
2523 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2524                                              SDValue &Offset) {
2525   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2526     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2527     Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2528     return true;
2529   }
2530 
2531   return false;
2532 }
2533 
2534 // Fold constant addresses.
2535 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2536                                const MVT VT, const RISCVSubtarget *Subtarget,
2537                                SDValue Addr, SDValue &Base, SDValue &Offset,
2538                                bool IsPrefetch = false,
2539                                bool IsRV32Zdinx = false) {
2540   if (!isa<ConstantSDNode>(Addr))
2541     return false;
2542 
2543   int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2544 
2545   // If the constant is a simm12, we can fold the whole constant and use X0 as
2546   // the base. If the constant can be materialized with LUI+simm12, use LUI as
2547   // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2548   int64_t Lo12 = SignExtend64<12>(CVal);
2549   int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2550   if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2551     if (IsPrefetch && (Lo12 & 0b11111) != 0)
2552       return false;
2553     if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2554       return false;
2555 
2556     if (Hi) {
2557       int64_t Hi20 = (Hi >> 12) & 0xfffff;
2558       Base = SDValue(
2559           CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2560                                  CurDAG->getTargetConstant(Hi20, DL, VT)),
2561           0);
2562     } else {
2563       Base = CurDAG->getRegister(RISCV::X0, VT);
2564     }
2565     Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2566     return true;
2567   }
2568 
2569   // Ask how constant materialization would handle this constant.
2570   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2571 
2572   // If the last instruction would be an ADDI, we can fold its immediate and
2573   // emit the rest of the sequence as the base.
2574   if (Seq.back().getOpcode() != RISCV::ADDI)
2575     return false;
2576   Lo12 = Seq.back().getImm();
2577   if (IsPrefetch && (Lo12 & 0b11111) != 0)
2578     return false;
2579   if (IsRV32Zdinx && !isInt<12>(Lo12 + 4))
2580     return false;
2581 
2582   // Drop the last instruction.
2583   Seq.pop_back();
2584   assert(!Seq.empty() && "Expected more instructions in sequence");
2585 
2586   Base = selectImmSeq(CurDAG, DL, VT, Seq);
2587   Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2588   return true;
2589 }
2590 
2591 // Is this ADD instruction only used as the base pointer of scalar loads and
2592 // stores?
2593 static bool isWorthFoldingAdd(SDValue Add) {
2594   for (auto *User : Add->users()) {
2595     if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2596         User->getOpcode() != ISD::ATOMIC_LOAD &&
2597         User->getOpcode() != ISD::ATOMIC_STORE)
2598       return false;
2599     EVT VT = cast<MemSDNode>(User)->getMemoryVT();
2600     if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2601         VT != MVT::f64)
2602       return false;
2603     // Don't allow stores of the value. It must be used as the address.
2604     if (User->getOpcode() == ISD::STORE &&
2605         cast<StoreSDNode>(User)->getValue() == Add)
2606       return false;
2607     if (User->getOpcode() == ISD::ATOMIC_STORE &&
2608         cast<AtomicSDNode>(User)->getVal() == Add)
2609       return false;
2610   }
2611 
2612   return true;
2613 }
2614 
2615 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
2616                                               unsigned MaxShiftAmount,
2617                                               SDValue &Base, SDValue &Index,
2618                                               SDValue &Scale) {
2619   EVT VT = Addr.getSimpleValueType();
2620   auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2621                                               SDValue &Shift) {
2622     uint64_t ShiftAmt = 0;
2623     Index = N;
2624 
2625     if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2626       // Only match shifts by a value in range [0, MaxShiftAmount].
2627       if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2628         Index = N.getOperand(0);
2629         ShiftAmt = N.getConstantOperandVal(1);
2630       }
2631     }
2632 
2633     Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2634     return ShiftAmt != 0;
2635   };
2636 
2637   if (Addr.getOpcode() == ISD::ADD) {
2638     if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2639       SDValue AddrB = Addr.getOperand(0);
2640       if (AddrB.getOpcode() == ISD::ADD &&
2641           UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2642           !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2643           isInt<12>(C1->getSExtValue())) {
2644         // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2645         SDValue C1Val =
2646             CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2647         Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2648                                               AddrB.getOperand(1), C1Val),
2649                        0);
2650         return true;
2651       }
2652     } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2653       Base = Addr.getOperand(1);
2654       return true;
2655     } else {
2656       UnwrapShl(Addr.getOperand(1), Index, Scale);
2657       Base = Addr.getOperand(0);
2658       return true;
2659     }
2660   } else if (UnwrapShl(Addr, Index, Scale)) {
2661     EVT VT = Addr.getValueType();
2662     Base = CurDAG->getRegister(RISCV::X0, VT);
2663     return true;
2664   }
2665 
2666   return false;
2667 }
2668 
2669 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2670                                          SDValue &Offset, bool IsRV32Zdinx) {
2671   if (SelectAddrFrameIndex(Addr, Base, Offset))
2672     return true;
2673 
2674   SDLoc DL(Addr);
2675   MVT VT = Addr.getSimpleValueType();
2676 
2677   if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2678     // If this is non RV32Zdinx we can always fold.
2679     if (!IsRV32Zdinx) {
2680       Base = Addr.getOperand(0);
2681       Offset = Addr.getOperand(1);
2682       return true;
2683     }
2684 
2685     // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4
2686     // to the offset when we expand in RISCVExpandPseudoInsts.
2687     if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
2688       const DataLayout &DL = CurDAG->getDataLayout();
2689       Align Alignment = commonAlignment(
2690           GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2691       if (Alignment > 4) {
2692         Base = Addr.getOperand(0);
2693         Offset = Addr.getOperand(1);
2694         return true;
2695       }
2696     }
2697     if (auto *CP = dyn_cast<ConstantPoolSDNode>(Addr.getOperand(1))) {
2698       Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
2699       if (Alignment > 4) {
2700         Base = Addr.getOperand(0);
2701         Offset = Addr.getOperand(1);
2702         return true;
2703       }
2704     }
2705   }
2706 
2707   int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0;
2708   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2709     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2710     if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2711       Base = Addr.getOperand(0);
2712       if (Base.getOpcode() == RISCVISD::ADD_LO) {
2713         SDValue LoOperand = Base.getOperand(1);
2714         if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2715           // If the Lo in (ADD_LO hi, lo) is a global variable's address
2716           // (its low part, really), then we can rely on the alignment of that
2717           // variable to provide a margin of safety before low part can overflow
2718           // the 12 bits of the load/store offset. Check if CVal falls within
2719           // that margin; if so (low part + CVal) can't overflow.
2720           const DataLayout &DL = CurDAG->getDataLayout();
2721           Align Alignment = commonAlignment(
2722               GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2723           if ((CVal == 0 || Alignment > CVal) &&
2724               (!IsRV32Zdinx || commonAlignment(Alignment, CVal) > 4)) {
2725             int64_t CombinedOffset = CVal + GA->getOffset();
2726             Base = Base.getOperand(0);
2727             Offset = CurDAG->getTargetGlobalAddress(
2728                 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2729                 CombinedOffset, GA->getTargetFlags());
2730             return true;
2731           }
2732         }
2733       }
2734 
2735       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2736         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2737       Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2738       return true;
2739     }
2740   }
2741 
2742   // Handle ADD with large immediates.
2743   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2744     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2745     assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2746            "simm12 not already handled?");
2747 
2748     // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2749     // an ADDI for part of the offset and fold the rest into the load/store.
2750     // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2751     if (CVal >= -4096 && CVal <= (4094 - RV32ZdinxRange)) {
2752       int64_t Adj = CVal < 0 ? -2048 : 2047;
2753       Base = SDValue(
2754           CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2755                                  CurDAG->getSignedTargetConstant(Adj, DL, VT)),
2756           0);
2757       Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
2758       return true;
2759     }
2760 
2761     // For larger immediates, we might be able to save one instruction from
2762     // constant materialization by folding the Lo12 bits of the immediate into
2763     // the address. We should only do this if the ADD is only used by loads and
2764     // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2765     // separately with the full materialized immediate creating extra
2766     // instructions.
2767     if (isWorthFoldingAdd(Addr) &&
2768         selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2769                            Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) {
2770       // Insert an ADD instruction with the materialized Hi52 bits.
2771       Base = SDValue(
2772           CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2773           0);
2774       return true;
2775     }
2776   }
2777 
2778   if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2779                          /*IsPrefetch=*/false, RV32ZdinxRange))
2780     return true;
2781 
2782   Base = Addr;
2783   Offset = CurDAG->getTargetConstant(0, DL, VT);
2784   return true;
2785 }
2786 
2787 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2788 /// Offset should be all zeros.
2789 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2790                                                  SDValue &Offset) {
2791   if (SelectAddrFrameIndex(Addr, Base, Offset))
2792     return true;
2793 
2794   SDLoc DL(Addr);
2795   MVT VT = Addr.getSimpleValueType();
2796 
2797   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2798     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2799     if (isInt<12>(CVal)) {
2800       Base = Addr.getOperand(0);
2801 
2802       // Early-out if not a valid offset.
2803       if ((CVal & 0b11111) != 0) {
2804         Base = Addr;
2805         Offset = CurDAG->getTargetConstant(0, DL, VT);
2806         return true;
2807       }
2808 
2809       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2810         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2811       Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2812       return true;
2813     }
2814   }
2815 
2816   // Handle ADD with large immediates.
2817   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2818     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2819     assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2820            "simm12 not already handled?");
2821 
2822     // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2823     // one instruction by folding adjustment (-2048 or 2016) into the address.
2824     if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2825       int64_t Adj = CVal < 0 ? -2048 : 2016;
2826       int64_t AdjustedOffset = CVal - Adj;
2827       Base =
2828           SDValue(CurDAG->getMachineNode(
2829                       RISCV::ADDI, DL, VT, Addr.getOperand(0),
2830                       CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
2831                   0);
2832       Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
2833       return true;
2834     }
2835 
2836     if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2837                            Offset, /*IsPrefetch=*/true)) {
2838       // Insert an ADD instruction with the materialized Hi52 bits.
2839       Base = SDValue(
2840           CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2841           0);
2842       return true;
2843     }
2844   }
2845 
2846   if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2847                          /*IsPrefetch=*/true))
2848     return true;
2849 
2850   Base = Addr;
2851   Offset = CurDAG->getTargetConstant(0, DL, VT);
2852   return true;
2853 }
2854 
2855 bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
2856                                          SDValue &Offset) {
2857   if (Addr.getOpcode() != ISD::ADD)
2858     return false;
2859 
2860   if (isa<ConstantSDNode>(Addr.getOperand(1)))
2861     return false;
2862 
2863   Base = Addr.getOperand(1);
2864   Offset = Addr.getOperand(0);
2865   return true;
2866 }
2867 
2868 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
2869                                         SDValue &ShAmt) {
2870   ShAmt = N;
2871 
2872   // Peek through zext.
2873   if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2874     ShAmt = ShAmt.getOperand(0);
2875 
2876   // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2877   // amount. If there is an AND on the shift amount, we can bypass it if it
2878   // doesn't affect any of those bits.
2879   if (ShAmt.getOpcode() == ISD::AND &&
2880       isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2881     const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2882 
2883     // Since the max shift amount is a power of 2 we can subtract 1 to make a
2884     // mask that covers the bits needed to represent all shift amounts.
2885     assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2886     APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2887 
2888     if (ShMask.isSubsetOf(AndMask)) {
2889       ShAmt = ShAmt.getOperand(0);
2890     } else {
2891       // SimplifyDemandedBits may have optimized the mask so try restoring any
2892       // bits that are known zero.
2893       KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2894       if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2895         return true;
2896       ShAmt = ShAmt.getOperand(0);
2897     }
2898   }
2899 
2900   if (ShAmt.getOpcode() == ISD::ADD &&
2901       isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2902     uint64_t Imm = ShAmt.getConstantOperandVal(1);
2903     // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2904     // to avoid the ADD.
2905     if (Imm != 0 && Imm % ShiftWidth == 0) {
2906       ShAmt = ShAmt.getOperand(0);
2907       return true;
2908     }
2909   } else if (ShAmt.getOpcode() == ISD::SUB &&
2910              isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2911     uint64_t Imm = ShAmt.getConstantOperandVal(0);
2912     // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2913     // generate a NEG instead of a SUB of a constant.
2914     if (Imm != 0 && Imm % ShiftWidth == 0) {
2915       SDLoc DL(ShAmt);
2916       EVT VT = ShAmt.getValueType();
2917       SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2918       unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2919       MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2920                                                   ShAmt.getOperand(1));
2921       ShAmt = SDValue(Neg, 0);
2922       return true;
2923     }
2924     // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2925     // to generate a NOT instead of a SUB of a constant.
2926     if (Imm % ShiftWidth == ShiftWidth - 1) {
2927       SDLoc DL(ShAmt);
2928       EVT VT = ShAmt.getValueType();
2929       MachineSDNode *Not = CurDAG->getMachineNode(
2930           RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2931           CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
2932       ShAmt = SDValue(Not, 0);
2933       return true;
2934     }
2935   }
2936 
2937   return true;
2938 }
2939 
2940 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2941 /// check for equality with 0. This function emits instructions that convert the
2942 /// seteq/setne into something that can be compared with 0.
2943 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2944 /// ISD::SETNE).
2945 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
2946                                     SDValue &Val) {
2947   assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2948          "Unexpected condition code!");
2949 
2950   // We're looking for a setcc.
2951   if (N->getOpcode() != ISD::SETCC)
2952     return false;
2953 
2954   // Must be an equality comparison.
2955   ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2956   if (CCVal != ExpectedCCVal)
2957     return false;
2958 
2959   SDValue LHS = N->getOperand(0);
2960   SDValue RHS = N->getOperand(1);
2961 
2962   if (!LHS.getValueType().isScalarInteger())
2963     return false;
2964 
2965   // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2966   if (isNullConstant(RHS)) {
2967     Val = LHS;
2968     return true;
2969   }
2970 
2971   SDLoc DL(N);
2972 
2973   if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2974     int64_t CVal = C->getSExtValue();
2975     // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2976     // non-zero otherwise.
2977     if (CVal == -2048) {
2978       Val = SDValue(
2979           CurDAG->getMachineNode(
2980               RISCV::XORI, DL, N->getValueType(0), LHS,
2981               CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
2982           0);
2983       return true;
2984     }
2985     // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2986     // LHS is equal to the RHS and non-zero otherwise.
2987     if (isInt<12>(CVal) || CVal == 2048) {
2988       Val = SDValue(
2989           CurDAG->getMachineNode(
2990               RISCV::ADDI, DL, N->getValueType(0), LHS,
2991               CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
2992           0);
2993       return true;
2994     }
2995     if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
2996       Val = SDValue(
2997           CurDAG->getMachineNode(
2998               RISCV::BINVI, DL, N->getValueType(0), LHS,
2999               CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3000           0);
3001       return true;
3002     }
3003   }
3004 
3005   // If nothing else we can XOR the LHS and RHS to produce zero if they are
3006   // equal and a non-zero value if they aren't.
3007   Val = SDValue(
3008       CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3009   return true;
3010 }
3011 
3012 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3013   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3014       cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3015     Val = N.getOperand(0);
3016     return true;
3017   }
3018 
3019   auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3020     if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3021       return N;
3022 
3023     SDValue N0 = N.getOperand(0);
3024     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3025         N.getConstantOperandVal(1) == ShiftAmt &&
3026         N0.getConstantOperandVal(1) == ShiftAmt)
3027       return N0.getOperand(0);
3028 
3029     return N;
3030   };
3031 
3032   MVT VT = N.getSimpleValueType();
3033   if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3034     Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3035     return true;
3036   }
3037 
3038   return false;
3039 }
3040 
3041 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3042   if (N.getOpcode() == ISD::AND) {
3043     auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3044     if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3045       Val = N.getOperand(0);
3046       return true;
3047     }
3048   }
3049   MVT VT = N.getSimpleValueType();
3050   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3051   if (CurDAG->MaskedValueIsZero(N, Mask)) {
3052     Val = N;
3053     return true;
3054   }
3055 
3056   return false;
3057 }
3058 
3059 /// Look for various patterns that can be done with a SHL that can be folded
3060 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3061 /// SHXADD we are trying to match.
3062 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
3063                                        SDValue &Val) {
3064   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3065     SDValue N0 = N.getOperand(0);
3066 
3067     if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3068         (LeftShift || N0.getOpcode() == ISD::SRL) &&
3069         isa<ConstantSDNode>(N0.getOperand(1))) {
3070       uint64_t Mask = N.getConstantOperandVal(1);
3071       unsigned C2 = N0.getConstantOperandVal(1);
3072 
3073       unsigned XLen = Subtarget->getXLen();
3074       if (LeftShift)
3075         Mask &= maskTrailingZeros<uint64_t>(C2);
3076       else
3077         Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3078 
3079       // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3080       // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3081       // followed by a SHXADD with c3 for the X amount.
3082       if (isShiftedMask_64(Mask)) {
3083         unsigned Leading = XLen - llvm::bit_width(Mask);
3084         unsigned Trailing = llvm::countr_zero(Mask);
3085         if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
3086           SDLoc DL(N);
3087           EVT VT = N.getValueType();
3088           Val = SDValue(CurDAG->getMachineNode(
3089                             RISCV::SRLI, DL, VT, N0.getOperand(0),
3090                             CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
3091                         0);
3092           return true;
3093         }
3094         // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3095         // leading zeros and c3 trailing zeros. We can use an SRLI by C3
3096         // followed by a SHXADD using c3 for the X amount.
3097         if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
3098           SDLoc DL(N);
3099           EVT VT = N.getValueType();
3100           Val = SDValue(
3101               CurDAG->getMachineNode(
3102                   RISCV::SRLI, DL, VT, N0.getOperand(0),
3103                   CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
3104               0);
3105           return true;
3106         }
3107       }
3108     } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3109                isa<ConstantSDNode>(N0.getOperand(1))) {
3110       uint64_t Mask = N.getConstantOperandVal(1);
3111       unsigned C2 = N0.getConstantOperandVal(1);
3112 
3113       // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3114       // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3115       // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3116       // the X amount.
3117       if (isShiftedMask_64(Mask)) {
3118         unsigned XLen = Subtarget->getXLen();
3119         unsigned Leading = XLen - llvm::bit_width(Mask);
3120         unsigned Trailing = llvm::countr_zero(Mask);
3121         if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3122           SDLoc DL(N);
3123           EVT VT = N.getValueType();
3124           Val = SDValue(CurDAG->getMachineNode(
3125                             RISCV::SRAI, DL, VT, N0.getOperand(0),
3126                             CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3127                         0);
3128           Val = SDValue(CurDAG->getMachineNode(
3129                             RISCV::SRLI, DL, VT, Val,
3130                             CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3131                         0);
3132           return true;
3133         }
3134       }
3135     }
3136   } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3137              (LeftShift || N.getOpcode() == ISD::SRL) &&
3138              isa<ConstantSDNode>(N.getOperand(1))) {
3139     SDValue N0 = N.getOperand(0);
3140     if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3141         isa<ConstantSDNode>(N0.getOperand(1))) {
3142       uint64_t Mask = N0.getConstantOperandVal(1);
3143       if (isShiftedMask_64(Mask)) {
3144         unsigned C1 = N.getConstantOperandVal(1);
3145         unsigned XLen = Subtarget->getXLen();
3146         unsigned Leading = XLen - llvm::bit_width(Mask);
3147         unsigned Trailing = llvm::countr_zero(Mask);
3148         // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3149         // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3150         if (LeftShift && Leading == 32 && Trailing > 0 &&
3151             (Trailing + C1) == ShAmt) {
3152           SDLoc DL(N);
3153           EVT VT = N.getValueType();
3154           Val = SDValue(CurDAG->getMachineNode(
3155                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
3156                             CurDAG->getTargetConstant(Trailing, DL, VT)),
3157                         0);
3158           return true;
3159         }
3160         // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3161         // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3162         if (!LeftShift && Leading == 32 && Trailing > C1 &&
3163             (Trailing - C1) == ShAmt) {
3164           SDLoc DL(N);
3165           EVT VT = N.getValueType();
3166           Val = SDValue(CurDAG->getMachineNode(
3167                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
3168                             CurDAG->getTargetConstant(Trailing, DL, VT)),
3169                         0);
3170           return true;
3171         }
3172       }
3173     }
3174   }
3175 
3176   return false;
3177 }
3178 
3179 /// Look for various patterns that can be done with a SHL that can be folded
3180 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3181 /// SHXADD_UW we are trying to match.
3182 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3183                                           SDValue &Val) {
3184   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3185       N.hasOneUse()) {
3186     SDValue N0 = N.getOperand(0);
3187     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3188         N0.hasOneUse()) {
3189       uint64_t Mask = N.getConstantOperandVal(1);
3190       unsigned C2 = N0.getConstantOperandVal(1);
3191 
3192       Mask &= maskTrailingZeros<uint64_t>(C2);
3193 
3194       // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3195       // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3196       // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3197       if (isShiftedMask_64(Mask)) {
3198         unsigned Leading = llvm::countl_zero(Mask);
3199         unsigned Trailing = llvm::countr_zero(Mask);
3200         if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3201           SDLoc DL(N);
3202           EVT VT = N.getValueType();
3203           Val = SDValue(CurDAG->getMachineNode(
3204                             RISCV::SLLI, DL, VT, N0.getOperand(0),
3205                             CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3206                         0);
3207           return true;
3208         }
3209       }
3210     }
3211   }
3212 
3213   return false;
3214 }
3215 
3216 bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
3217   if (!isa<ConstantSDNode>(N))
3218     return false;
3219   int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3220 
3221   // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3222   if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3223     return false;
3224 
3225   // Abandon this transform if the constant is needed elsewhere.
3226   for (const SDNode *U : N->users()) {
3227     switch (U->getOpcode()) {
3228     case ISD::AND:
3229     case ISD::OR:
3230     case ISD::XOR:
3231       if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3232         return false;
3233       break;
3234     case RISCVISD::VMV_V_X_VL:
3235       if (!Subtarget->hasStdExtZvkb())
3236         return false;
3237       if (!all_of(U->users(), [](const SDNode *V) {
3238             return V->getOpcode() == ISD::AND ||
3239                    V->getOpcode() == RISCVISD::AND_VL;
3240           }))
3241         return false;
3242       break;
3243     default:
3244       return false;
3245     }
3246   }
3247 
3248   // For 64-bit constants, the instruction sequences get complex,
3249   // so we select inverted only if it's cheaper.
3250   if (!isInt<32>(Imm)) {
3251     int OrigImmCost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3252                                                  /*CompressionCost=*/true);
3253     int NegImmCost = RISCVMatInt::getIntMatCost(APInt(64, ~Imm), 64, *Subtarget,
3254                                                 /*CompressionCost=*/true);
3255     if (OrigImmCost <= NegImmCost)
3256       return false;
3257   }
3258 
3259   Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3260   return true;
3261 }
3262 
3263 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3264                                         unsigned Bits,
3265                                         const TargetInstrInfo *TII) {
3266   unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3267 
3268   if (!MCOpcode)
3269     return false;
3270 
3271   const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3272   const uint64_t TSFlags = MCID.TSFlags;
3273   if (!RISCVII::hasSEWOp(TSFlags))
3274     return false;
3275   assert(RISCVII::hasVLOp(TSFlags));
3276 
3277   bool HasGlueOp = User->getGluedNode() != nullptr;
3278   unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3279   bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3280   bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3281   unsigned VLIdx =
3282       User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3283   const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3284 
3285   if (UserOpNo == VLIdx)
3286     return false;
3287 
3288   auto NumDemandedBits =
3289       RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3290   return NumDemandedBits && Bits >= *NumDemandedBits;
3291 }
3292 
3293 // Return true if all users of this SDNode* only consume the lower \p Bits.
3294 // This can be used to form W instructions for add/sub/mul/shl even when the
3295 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3296 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
3297 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3298 // the add/sub/mul/shl to become non-W instructions. By checking the users we
3299 // may be able to use a W instruction and CSE with the other instruction if
3300 // this has happened. We could try to detect that the CSE opportunity exists
3301 // before doing this, but that would be more complicated.
3302 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
3303                                         const unsigned Depth) const {
3304   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3305           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3306           Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3307           Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3308           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3309           isa<ConstantSDNode>(Node) || Depth != 0) &&
3310          "Unexpected opcode");
3311 
3312   if (Depth >= SelectionDAG::MaxRecursionDepth)
3313     return false;
3314 
3315   // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3316   // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3317   if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3318     return false;
3319 
3320   for (SDUse &Use : Node->uses()) {
3321     SDNode *User = Use.getUser();
3322     // Users of this node should have already been instruction selected
3323     if (!User->isMachineOpcode())
3324       return false;
3325 
3326     // TODO: Add more opcodes?
3327     switch (User->getMachineOpcode()) {
3328     default:
3329       if (vectorPseudoHasAllNBitUsers(User, Use.getOperandNo(), Bits, TII))
3330         break;
3331       return false;
3332     case RISCV::ADDW:
3333     case RISCV::ADDIW:
3334     case RISCV::SUBW:
3335     case RISCV::MULW:
3336     case RISCV::SLLW:
3337     case RISCV::SLLIW:
3338     case RISCV::SRAW:
3339     case RISCV::SRAIW:
3340     case RISCV::SRLW:
3341     case RISCV::SRLIW:
3342     case RISCV::DIVW:
3343     case RISCV::DIVUW:
3344     case RISCV::REMW:
3345     case RISCV::REMUW:
3346     case RISCV::ROLW:
3347     case RISCV::RORW:
3348     case RISCV::RORIW:
3349     case RISCV::CLZW:
3350     case RISCV::CTZW:
3351     case RISCV::CPOPW:
3352     case RISCV::SLLI_UW:
3353     case RISCV::FMV_W_X:
3354     case RISCV::FCVT_H_W:
3355     case RISCV::FCVT_H_W_INX:
3356     case RISCV::FCVT_H_WU:
3357     case RISCV::FCVT_H_WU_INX:
3358     case RISCV::FCVT_S_W:
3359     case RISCV::FCVT_S_W_INX:
3360     case RISCV::FCVT_S_WU:
3361     case RISCV::FCVT_S_WU_INX:
3362     case RISCV::FCVT_D_W:
3363     case RISCV::FCVT_D_W_INX:
3364     case RISCV::FCVT_D_WU:
3365     case RISCV::FCVT_D_WU_INX:
3366     case RISCV::TH_REVW:
3367     case RISCV::TH_SRRIW:
3368       if (Bits >= 32)
3369         break;
3370       return false;
3371     case RISCV::SLL:
3372     case RISCV::SRA:
3373     case RISCV::SRL:
3374     case RISCV::ROL:
3375     case RISCV::ROR:
3376     case RISCV::BSET:
3377     case RISCV::BCLR:
3378     case RISCV::BINV:
3379       // Shift amount operands only use log2(Xlen) bits.
3380       if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3381         break;
3382       return false;
3383     case RISCV::SLLI:
3384       // SLLI only uses the lower (XLen - ShAmt) bits.
3385       if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3386         break;
3387       return false;
3388     case RISCV::ANDI:
3389       if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3390         break;
3391       goto RecCheck;
3392     case RISCV::ORI: {
3393       uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3394       if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3395         break;
3396       [[fallthrough]];
3397     }
3398     case RISCV::AND:
3399     case RISCV::OR:
3400     case RISCV::XOR:
3401     case RISCV::XORI:
3402     case RISCV::ANDN:
3403     case RISCV::ORN:
3404     case RISCV::XNOR:
3405     case RISCV::SH1ADD:
3406     case RISCV::SH2ADD:
3407     case RISCV::SH3ADD:
3408     RecCheck:
3409       if (hasAllNBitUsers(User, Bits, Depth + 1))
3410         break;
3411       return false;
3412     case RISCV::SRLI: {
3413       unsigned ShAmt = User->getConstantOperandVal(1);
3414       // If we are shifting right by less than Bits, and users don't demand any
3415       // bits that were shifted into [Bits-1:0], then we can consider this as an
3416       // N-Bit user.
3417       if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3418         break;
3419       return false;
3420     }
3421     case RISCV::SEXT_B:
3422     case RISCV::PACKH:
3423       if (Bits >= 8)
3424         break;
3425       return false;
3426     case RISCV::SEXT_H:
3427     case RISCV::FMV_H_X:
3428     case RISCV::ZEXT_H_RV32:
3429     case RISCV::ZEXT_H_RV64:
3430     case RISCV::PACKW:
3431       if (Bits >= 16)
3432         break;
3433       return false;
3434     case RISCV::PACK:
3435       if (Bits >= (Subtarget->getXLen() / 2))
3436         break;
3437       return false;
3438     case RISCV::ADD_UW:
3439     case RISCV::SH1ADD_UW:
3440     case RISCV::SH2ADD_UW:
3441     case RISCV::SH3ADD_UW:
3442       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3443       // 32 bits.
3444       if (Use.getOperandNo() == 0 && Bits >= 32)
3445         break;
3446       return false;
3447     case RISCV::SB:
3448       if (Use.getOperandNo() == 0 && Bits >= 8)
3449         break;
3450       return false;
3451     case RISCV::SH:
3452       if (Use.getOperandNo() == 0 && Bits >= 16)
3453         break;
3454       return false;
3455     case RISCV::SW:
3456       if (Use.getOperandNo() == 0 && Bits >= 32)
3457         break;
3458       return false;
3459     }
3460   }
3461 
3462   return true;
3463 }
3464 
3465 // Select a constant that can be represented as (sign_extend(imm5) << imm2).
3466 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3467                                         SDValue &Shl2) {
3468   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3469     int64_t Offset = C->getSExtValue();
3470     unsigned Shift;
3471     for (Shift = 0; Shift < 4; Shift++)
3472       if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3473         break;
3474 
3475     // Constant cannot be encoded.
3476     if (Shift == 4)
3477       return false;
3478 
3479     EVT Ty = N->getValueType(0);
3480     Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3481     Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3482     return true;
3483   }
3484 
3485   return false;
3486 }
3487 
3488 // Select VL as a 5 bit immediate or a value that will become a register. This
3489 // allows us to choose betwen VSETIVLI or VSETVLI later.
3490 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3491   auto *C = dyn_cast<ConstantSDNode>(N);
3492   if (C && isUInt<5>(C->getZExtValue())) {
3493     VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3494                                    N->getValueType(0));
3495   } else if (C && C->isAllOnes()) {
3496     // Treat all ones as VLMax.
3497     VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3498                                          N->getValueType(0));
3499   } else if (isa<RegisterSDNode>(N) &&
3500              cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3501     // All our VL operands use an operand that allows GPRNoX0 or an immediate
3502     // as the register class. Convert X0 to a special immediate to pass the
3503     // MachineVerifier. This is recognized specially by the vsetvli insertion
3504     // pass.
3505     VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3506                                          N->getValueType(0));
3507   } else {
3508     VL = N;
3509   }
3510 
3511   return true;
3512 }
3513 
3514 static SDValue findVSplat(SDValue N) {
3515   if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3516     if (!N.getOperand(0).isUndef())
3517       return SDValue();
3518     N = N.getOperand(1);
3519   }
3520   SDValue Splat = N;
3521   if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3522        Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3523       !Splat.getOperand(0).isUndef())
3524     return SDValue();
3525   assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3526   return Splat;
3527 }
3528 
3529 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3530   SDValue Splat = findVSplat(N);
3531   if (!Splat)
3532     return false;
3533 
3534   SplatVal = Splat.getOperand(1);
3535   return true;
3536 }
3537 
3538 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3539                                   SelectionDAG &DAG,
3540                                   const RISCVSubtarget &Subtarget,
3541                                   std::function<bool(int64_t)> ValidateImm) {
3542   SDValue Splat = findVSplat(N);
3543   if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3544     return false;
3545 
3546   const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3547   assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3548          "Unexpected splat operand type");
3549 
3550   // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3551   // type is wider than the resulting vector element type: an implicit
3552   // truncation first takes place. Therefore, perform a manual
3553   // truncation/sign-extension in order to ignore any truncated bits and catch
3554   // any zero-extended immediate.
3555   // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3556   // sign-extending to (XLenVT -1).
3557   APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3558 
3559   int64_t SplatImm = SplatConst.getSExtValue();
3560 
3561   if (!ValidateImm(SplatImm))
3562     return false;
3563 
3564   SplatVal =
3565       DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3566   return true;
3567 }
3568 
3569 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3570   return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3571                                [](int64_t Imm) { return isInt<5>(Imm); });
3572 }
3573 
3574 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3575   return selectVSplatImmHelper(
3576       N, SplatVal, *CurDAG, *Subtarget,
3577       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3578 }
3579 
3580 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3581                                                       SDValue &SplatVal) {
3582   return selectVSplatImmHelper(
3583       N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3584         return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3585       });
3586 }
3587 
3588 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3589                                          SDValue &SplatVal) {
3590   return selectVSplatImmHelper(
3591       N, SplatVal, *CurDAG, *Subtarget,
3592       [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3593 }
3594 
3595 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3596   auto IsExtOrTrunc = [](SDValue N) {
3597     switch (N->getOpcode()) {
3598     case ISD::SIGN_EXTEND:
3599     case ISD::ZERO_EXTEND:
3600     // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3601     // inactive elements will be undef.
3602     case RISCVISD::TRUNCATE_VECTOR_VL:
3603     case RISCVISD::VSEXT_VL:
3604     case RISCVISD::VZEXT_VL:
3605       return true;
3606     default:
3607       return false;
3608     }
3609   };
3610 
3611   // We can have multiple nested nodes, so unravel them all if needed.
3612   while (IsExtOrTrunc(N)) {
3613     if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3614       return false;
3615     N = N->getOperand(0);
3616   }
3617 
3618   return selectVSplat(N, SplatVal);
3619 }
3620 
3621 bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
3622   // Allow bitcasts from XLenVT -> FP.
3623   if (N.getOpcode() == ISD::BITCAST &&
3624       N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
3625     Imm = N.getOperand(0);
3626     return true;
3627   }
3628   // Allow moves from XLenVT to FP.
3629   if (N.getOpcode() == RISCVISD::FMV_H_X ||
3630       N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
3631     Imm = N.getOperand(0);
3632     return true;
3633   }
3634 
3635   // Otherwise, look for FP constants that can materialized with scalar int.
3636   ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3637   if (!CFP)
3638     return false;
3639   const APFloat &APF = CFP->getValueAPF();
3640   // td can handle +0.0 already.
3641   if (APF.isPosZero())
3642     return false;
3643 
3644   MVT VT = CFP->getSimpleValueType(0);
3645 
3646   MVT XLenVT = Subtarget->getXLenVT();
3647   if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3648     assert(APF.isNegZero() && "Unexpected constant.");
3649     return false;
3650   }
3651   SDLoc DL(N);
3652   Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3653                   *Subtarget);
3654   return true;
3655 }
3656 
3657 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3658                                        SDValue &Imm) {
3659   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3660     int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3661 
3662     if (!isInt<5>(ImmVal))
3663       return false;
3664 
3665     Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
3666                                           Subtarget->getXLenVT());
3667     return true;
3668   }
3669 
3670   return false;
3671 }
3672 
3673 // Try to remove sext.w if the input is a W instruction or can be made into
3674 // a W instruction cheaply.
3675 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3676   // Look for the sext.w pattern, addiw rd, rs1, 0.
3677   if (N->getMachineOpcode() != RISCV::ADDIW ||
3678       !isNullConstant(N->getOperand(1)))
3679     return false;
3680 
3681   SDValue N0 = N->getOperand(0);
3682   if (!N0.isMachineOpcode())
3683     return false;
3684 
3685   switch (N0.getMachineOpcode()) {
3686   default:
3687     break;
3688   case RISCV::ADD:
3689   case RISCV::ADDI:
3690   case RISCV::SUB:
3691   case RISCV::MUL:
3692   case RISCV::SLLI: {
3693     // Convert sext.w+add/sub/mul to their W instructions. This will create
3694     // a new independent instruction. This improves latency.
3695     unsigned Opc;
3696     switch (N0.getMachineOpcode()) {
3697     default:
3698       llvm_unreachable("Unexpected opcode!");
3699     case RISCV::ADD:  Opc = RISCV::ADDW;  break;
3700     case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3701     case RISCV::SUB:  Opc = RISCV::SUBW;  break;
3702     case RISCV::MUL:  Opc = RISCV::MULW;  break;
3703     case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3704     }
3705 
3706     SDValue N00 = N0.getOperand(0);
3707     SDValue N01 = N0.getOperand(1);
3708 
3709     // Shift amount needs to be uimm5.
3710     if (N0.getMachineOpcode() == RISCV::SLLI &&
3711         !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3712       break;
3713 
3714     SDNode *Result =
3715         CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3716                                N00, N01);
3717     ReplaceUses(N, Result);
3718     return true;
3719   }
3720   case RISCV::ADDW:
3721   case RISCV::ADDIW:
3722   case RISCV::SUBW:
3723   case RISCV::MULW:
3724   case RISCV::SLLIW:
3725   case RISCV::PACKW:
3726   case RISCV::TH_MULAW:
3727   case RISCV::TH_MULAH:
3728   case RISCV::TH_MULSW:
3729   case RISCV::TH_MULSH:
3730     if (N0.getValueType() == MVT::i32)
3731       break;
3732 
3733     // Result is already sign extended just remove the sext.w.
3734     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3735     ReplaceUses(N, N0.getNode());
3736     return true;
3737   }
3738 
3739   return false;
3740 }
3741 
3742 // After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3743 // that's glued to the pseudo. This tries to look up the value that was copied
3744 // to V0.
3745 static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3746   // Check that we're using V0 as a mask register.
3747   if (!isa<RegisterSDNode>(MaskOp) ||
3748       cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3749     return SDValue();
3750 
3751   // The glued user defines V0.
3752   const auto *Glued = GlueOp.getNode();
3753 
3754   if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3755     return SDValue();
3756 
3757   // Check that we're defining V0 as a mask register.
3758   if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3759       cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3760     return SDValue();
3761 
3762   SDValue MaskSetter = Glued->getOperand(2);
3763 
3764   // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3765   // from an extract_subvector or insert_subvector.
3766   if (MaskSetter->isMachineOpcode() &&
3767       MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3768     MaskSetter = MaskSetter->getOperand(0);
3769 
3770   return MaskSetter;
3771 }
3772 
3773 static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3774   // Check the instruction defining V0; it needs to be a VMSET pseudo.
3775   SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3776   if (!MaskSetter)
3777     return false;
3778 
3779   const auto IsVMSet = [](unsigned Opc) {
3780     return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3781            Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3782            Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3783            Opc == RISCV::PseudoVMSET_M_B8;
3784   };
3785 
3786   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3787   // undefined behaviour if it's the wrong bitwidth, so we could choose to
3788   // assume that it's all-ones? Same applies to its VL.
3789   return MaskSetter->isMachineOpcode() &&
3790          IsVMSet(MaskSetter.getMachineOpcode());
3791 }
3792 
3793 // Return true if we can make sure mask of N is all-ones mask.
3794 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3795   return usesAllOnesMask(N->getOperand(MaskOpIdx),
3796                          N->getOperand(N->getNumOperands() - 1));
3797 }
3798 
3799 static bool isImplicitDef(SDValue V) {
3800   if (!V.isMachineOpcode())
3801     return false;
3802   if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3803     for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3804       if (!isImplicitDef(V.getOperand(I)))
3805         return false;
3806     return true;
3807   }
3808   return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3809 }
3810 
3811 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
3812 // corresponding "unmasked" pseudo versions. The mask we're interested in will
3813 // take the form of a V0 physical register operand, with a glued
3814 // register-setting instruction.
3815 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3816   const RISCV::RISCVMaskedPseudoInfo *I =
3817       RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3818   if (!I)
3819     return false;
3820 
3821   unsigned MaskOpIdx = I->MaskOpIdx;
3822   if (!usesAllOnesMask(N, MaskOpIdx))
3823     return false;
3824 
3825   // There are two classes of pseudos in the table - compares and
3826   // everything else.  See the comment on RISCVMaskedPseudo for details.
3827   const unsigned Opc = I->UnmaskedPseudo;
3828   const MCInstrDesc &MCID = TII->get(Opc);
3829   const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
3830 
3831   const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3832   const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
3833 
3834   assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
3835          RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
3836          "Masked and unmasked pseudos are inconsistent");
3837   assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
3838          "Unexpected pseudo structure");
3839   assert(!(HasPassthru && !MaskedHasPassthru) &&
3840          "Unmasked pseudo has passthru but masked pseudo doesn't?");
3841 
3842   SmallVector<SDValue, 8> Ops;
3843   // Skip the passthru operand at index 0 if the unmasked don't have one.
3844   bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
3845   for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
3846     // Skip the mask, and the Glue.
3847     SDValue Op = N->getOperand(I);
3848     if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3849       continue;
3850     Ops.push_back(Op);
3851   }
3852 
3853   // Transitively apply any node glued to our new node.
3854   const auto *Glued = N->getGluedNode();
3855   if (auto *TGlued = Glued->getGluedNode())
3856     Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3857 
3858   MachineSDNode *Result =
3859       CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3860 
3861   if (!N->memoperands_empty())
3862     CurDAG->setNodeMemRefs(Result, N->memoperands());
3863 
3864   Result->setFlags(N->getFlags());
3865   ReplaceUses(N, Result);
3866 
3867   return true;
3868 }
3869 
3870 static bool IsVMerge(SDNode *N) {
3871   return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3872 }
3873 
3874 // Try to fold away VMERGE_VVM instructions into their true operands:
3875 //
3876 // %true = PseudoVADD_VV ...
3877 // %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3878 // ->
3879 // %x = PseudoVADD_VV_MASK %false, ..., %mask
3880 //
3881 // We can only fold if vmerge's passthru operand, vmerge's false operand and
3882 // %true's passthru operand (if it has one) are the same. This is because we
3883 // have to consolidate them into one passthru operand in the result.
3884 //
3885 // If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3886 // mask is all ones.
3887 //
3888 // The resulting VL is the minimum of the two VLs.
3889 //
3890 // The resulting policy is the effective policy the vmerge would have had,
3891 // i.e. whether or not it's passthru operand was implicit-def.
3892 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3893   SDValue Passthru, False, True, VL, Mask, Glue;
3894   assert(IsVMerge(N));
3895   Passthru = N->getOperand(0);
3896   False = N->getOperand(1);
3897   True = N->getOperand(2);
3898   Mask = N->getOperand(3);
3899   VL = N->getOperand(4);
3900   // We always have a glue node for the mask at v0.
3901   Glue = N->getOperand(N->getNumOperands() - 1);
3902   assert(cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3903   assert(Glue.getValueType() == MVT::Glue);
3904 
3905   // If the EEW of True is different from vmerge's SEW, then we can't fold.
3906   if (True.getSimpleValueType() != N->getSimpleValueType(0))
3907     return false;
3908 
3909   // We require that either passthru and false are the same, or that passthru
3910   // is undefined.
3911   if (Passthru != False && !isImplicitDef(Passthru))
3912     return false;
3913 
3914   assert(True.getResNo() == 0 &&
3915          "Expect True is the first output of an instruction.");
3916 
3917   // Need N is the exactly one using True.
3918   if (!True.hasOneUse())
3919     return false;
3920 
3921   if (!True.isMachineOpcode())
3922     return false;
3923 
3924   unsigned TrueOpc = True.getMachineOpcode();
3925   const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3926   uint64_t TrueTSFlags = TrueMCID.TSFlags;
3927   bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3928 
3929   const RISCV::RISCVMaskedPseudoInfo *Info =
3930       RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3931   if (!Info)
3932     return false;
3933 
3934   // If True has a passthru operand then it needs to be the same as vmerge's
3935   // False, since False will be used for the result's passthru operand.
3936   if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3937     SDValue PassthruOpTrue = True->getOperand(0);
3938     if (False != PassthruOpTrue)
3939       return false;
3940   }
3941 
3942   // Skip if True has side effect.
3943   if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3944     return false;
3945 
3946   // The last operand of a masked instruction may be glued.
3947   bool HasGlueOp = True->getGluedNode() != nullptr;
3948 
3949   // The chain operand may exist either before the glued operands or in the last
3950   // position.
3951   unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3952   bool HasChainOp =
3953       True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3954 
3955   if (HasChainOp) {
3956     // Avoid creating cycles in the DAG. We must ensure that none of the other
3957     // operands depend on True through it's Chain.
3958     SmallVector<const SDNode *, 4> LoopWorklist;
3959     SmallPtrSet<const SDNode *, 16> Visited;
3960     LoopWorklist.push_back(False.getNode());
3961     LoopWorklist.push_back(Mask.getNode());
3962     LoopWorklist.push_back(VL.getNode());
3963     LoopWorklist.push_back(Glue.getNode());
3964     if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3965       return false;
3966   }
3967 
3968   // The vector policy operand may be present for masked intrinsics
3969   bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3970   unsigned TrueVLIndex =
3971       True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3972   SDValue TrueVL = True.getOperand(TrueVLIndex);
3973   SDValue SEW = True.getOperand(TrueVLIndex + 1);
3974 
3975   auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3976     if (LHS == RHS)
3977       return LHS;
3978     if (isAllOnesConstant(LHS))
3979       return RHS;
3980     if (isAllOnesConstant(RHS))
3981       return LHS;
3982     auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3983     auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3984     if (!CLHS || !CRHS)
3985       return SDValue();
3986     return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3987   };
3988 
3989   // Because N and True must have the same passthru operand (or True's operand
3990   // is implicit_def), the "effective" body is the minimum of their VLs.
3991   SDValue OrigVL = VL;
3992   VL = GetMinVL(TrueVL, VL);
3993   if (!VL)
3994     return false;
3995 
3996   // Some operations produce different elementwise results depending on the
3997   // active elements, like viota.m or vredsum. This transformation is illegal
3998   // for these if we change the active elements (i.e. mask or VL).
3999   const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc));
4000   if (RISCVII::elementsDependOnVL(TrueBaseMCID.TSFlags) && (TrueVL != VL))
4001     return false;
4002   if (RISCVII::elementsDependOnMask(TrueBaseMCID.TSFlags) &&
4003       (Mask && !usesAllOnesMask(Mask, Glue)))
4004     return false;
4005 
4006   // Make sure it doesn't raise any observable fp exceptions, since changing the
4007   // active elements will affect how fflags is set.
4008   if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
4009     return false;
4010 
4011   SDLoc DL(N);
4012 
4013   unsigned MaskedOpc = Info->MaskedPseudo;
4014 #ifndef NDEBUG
4015   const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
4016   assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
4017          "Expected instructions with mask have policy operand.");
4018   assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
4019                                          MCOI::TIED_TO) == 0 &&
4020          "Expected instructions with mask have a tied dest.");
4021 #endif
4022 
4023   // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
4024   // operand is undefined.
4025   //
4026   // However, if the VL became smaller than what the vmerge had originally, then
4027   // elements past VL that were previously in the vmerge's body will have moved
4028   // to the tail. In that case we always need to use tail undisturbed to
4029   // preserve them.
4030   bool MergeVLShrunk = VL != OrigVL;
4031   uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk)
4032                         ? RISCVII::TAIL_AGNOSTIC
4033                         : /*TUMU*/ 0;
4034   SDValue PolicyOp =
4035     CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
4036 
4037 
4038   SmallVector<SDValue, 8> Ops;
4039   Ops.push_back(False);
4040 
4041   const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
4042   const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
4043   Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
4044 
4045   Ops.push_back(Mask);
4046 
4047   // For unmasked "VOp" with rounding mode operand, that is interfaces like
4048   // (..., rm, vl) or (..., rm, vl, policy).
4049   // Its masked version is (..., vm, rm, vl, policy).
4050   // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
4051   if (HasRoundingMode)
4052     Ops.push_back(True->getOperand(TrueVLIndex - 1));
4053 
4054   Ops.append({VL, SEW, PolicyOp});
4055 
4056   // Result node should have chain operand of True.
4057   if (HasChainOp)
4058     Ops.push_back(True.getOperand(TrueChainOpIdx));
4059 
4060   // Add the glue for the CopyToReg of mask->v0.
4061   Ops.push_back(Glue);
4062 
4063   MachineSDNode *Result =
4064       CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
4065   Result->setFlags(True->getFlags());
4066 
4067   if (!cast<MachineSDNode>(True)->memoperands_empty())
4068     CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
4069 
4070   // Replace vmerge.vvm node by Result.
4071   ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
4072 
4073   // Replace another value of True. E.g. chain and VL.
4074   for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
4075     ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
4076 
4077   return true;
4078 }
4079 
4080 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
4081   bool MadeChange = false;
4082   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4083 
4084   while (Position != CurDAG->allnodes_begin()) {
4085     SDNode *N = &*--Position;
4086     if (N->use_empty() || !N->isMachineOpcode())
4087       continue;
4088 
4089     if (IsVMerge(N))
4090       MadeChange |= performCombineVMergeAndVOps(N);
4091   }
4092   return MadeChange;
4093 }
4094 
4095 /// If our passthru is an implicit_def, use noreg instead.  This side
4096 /// steps issues with MachineCSE not being able to CSE expressions with
4097 /// IMPLICIT_DEF operands while preserving the semantic intent. See
4098 /// pr64282 for context. Note that this transform is the last one
4099 /// performed at ISEL DAG to DAG.
4100 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4101   bool MadeChange = false;
4102   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4103 
4104   while (Position != CurDAG->allnodes_begin()) {
4105     SDNode *N = &*--Position;
4106     if (N->use_empty() || !N->isMachineOpcode())
4107       continue;
4108 
4109     const unsigned Opc = N->getMachineOpcode();
4110     if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4111         !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
4112         !isImplicitDef(N->getOperand(0)))
4113       continue;
4114 
4115     SmallVector<SDValue> Ops;
4116     Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4117     for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4118       SDValue Op = N->getOperand(I);
4119       Ops.push_back(Op);
4120     }
4121 
4122     MachineSDNode *Result =
4123       CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4124     Result->setFlags(N->getFlags());
4125     CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4126     ReplaceUses(N, Result);
4127     MadeChange = true;
4128   }
4129   return MadeChange;
4130 }
4131 
4132 
4133 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
4134 // for instruction scheduling.
4135 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4136                                        CodeGenOptLevel OptLevel) {
4137   return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4138 }
4139 
4140 char RISCVDAGToDAGISelLegacy::ID = 0;
4141 
4142 RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4143                                                  CodeGenOptLevel OptLevel)
4144     : SelectionDAGISelLegacy(
4145           ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4146 
4147 INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4148