xref: /llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp (revision 0cb7636a462a8d4209e2b6344304eb43f02853eb)
1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation  -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVConstantPoolValue.h"
18 #include "RISCVMachineFunctionInfo.h"
19 #include "RISCVRegisterInfo.h"
20 #include "RISCVSelectionDAGInfo.h"
21 #include "RISCVSubtarget.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/MemoryLocation.h"
25 #include "llvm/Analysis/VectorUtils.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineJumpTableInfo.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
32 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
33 #include "llvm/CodeGen/ValueTypes.h"
34 #include "llvm/IR/DiagnosticInfo.h"
35 #include "llvm/IR/DiagnosticPrinter.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicsRISCV.h"
39 #include "llvm/IR/PatternMatch.h"
40 #include "llvm/MC/MCCodeEmitter.h"
41 #include "llvm/MC/MCInstBuilder.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/InstructionCost.h"
46 #include "llvm/Support/KnownBits.h"
47 #include "llvm/Support/MathExtras.h"
48 #include "llvm/Support/raw_ostream.h"
49 #include <optional>
50 
51 using namespace llvm;
52 
53 #define DEBUG_TYPE "riscv-lower"
54 
55 STATISTIC(NumTailCalls, "Number of tail calls");
56 
57 static cl::opt<unsigned> ExtensionMaxWebSize(
58     DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59     cl::desc("Give the maximum size (in number of nodes) of the web of "
60              "instructions that we will consider for VW expansion"),
61     cl::init(18));
62 
63 static cl::opt<bool>
64     AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65                      cl::desc("Allow the formation of VW_W operations (e.g., "
66                               "VWADD_W) with splat constants"),
67                      cl::init(false));
68 
69 static cl::opt<unsigned> NumRepeatedDivisors(
70     DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71     cl::desc("Set the minimum number of repetitions of a divisor to allow "
72              "transformation to multiplications by the reciprocal"),
73     cl::init(2));
74 
75 static cl::opt<int>
76     FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
77               cl::desc("Give the maximum number of instructions that we will "
78                        "use for creating a floating-point immediate value"),
79               cl::init(2));
80 
81 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
82                                          const RISCVSubtarget &STI)
83     : TargetLowering(TM), Subtarget(STI) {
84 
85   RISCVABI::ABI ABI = Subtarget.getTargetABI();
86   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87 
88   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89       !Subtarget.hasStdExtF()) {
90     errs() << "Hard-float 'f' ABI can't be used for a target that "
91                 "doesn't support the F instruction set extension (ignoring "
92                           "target-abi)\n";
93     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
94   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95              !Subtarget.hasStdExtD()) {
96     errs() << "Hard-float 'd' ABI can't be used for a target that "
97               "doesn't support the D instruction set extension (ignoring "
98               "target-abi)\n";
99     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
100   }
101 
102   switch (ABI) {
103   default:
104     report_fatal_error("Don't know how to lower this ABI");
105   case RISCVABI::ABI_ILP32:
106   case RISCVABI::ABI_ILP32E:
107   case RISCVABI::ABI_LP64E:
108   case RISCVABI::ABI_ILP32F:
109   case RISCVABI::ABI_ILP32D:
110   case RISCVABI::ABI_LP64:
111   case RISCVABI::ABI_LP64F:
112   case RISCVABI::ABI_LP64D:
113     break;
114   }
115 
116   MVT XLenVT = Subtarget.getXLenVT();
117 
118   // Set up the register classes.
119   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120 
121   if (Subtarget.hasStdExtZfhmin())
122     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123   if (Subtarget.hasStdExtZfbfmin())
124     addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125   if (Subtarget.hasStdExtF())
126     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127   if (Subtarget.hasStdExtD())
128     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129   if (Subtarget.hasStdExtZhinxmin())
130     addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131   if (Subtarget.hasStdExtZfinx())
132     addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133   if (Subtarget.hasStdExtZdinx()) {
134     if (Subtarget.is64Bit())
135       addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136     else
137       addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138   }
139 
140   static const MVT::SimpleValueType BoolVecVTs[] = {
141       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
142       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143   static const MVT::SimpleValueType IntVecVTs[] = {
144       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
145       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
146       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148       MVT::nxv4i64, MVT::nxv8i64};
149   static const MVT::SimpleValueType F16VecVTs[] = {
150       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
151       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152   static const MVT::SimpleValueType BF16VecVTs[] = {
153       MVT::nxv1bf16, MVT::nxv2bf16,  MVT::nxv4bf16,
154       MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155   static const MVT::SimpleValueType F32VecVTs[] = {
156       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157   static const MVT::SimpleValueType F64VecVTs[] = {
158       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159   static const MVT::SimpleValueType VecTupleVTs[] = {
160       MVT::riscv_nxv1i8x2,  MVT::riscv_nxv1i8x3,  MVT::riscv_nxv1i8x4,
161       MVT::riscv_nxv1i8x5,  MVT::riscv_nxv1i8x6,  MVT::riscv_nxv1i8x7,
162       MVT::riscv_nxv1i8x8,  MVT::riscv_nxv2i8x2,  MVT::riscv_nxv2i8x3,
163       MVT::riscv_nxv2i8x4,  MVT::riscv_nxv2i8x5,  MVT::riscv_nxv2i8x6,
164       MVT::riscv_nxv2i8x7,  MVT::riscv_nxv2i8x8,  MVT::riscv_nxv4i8x2,
165       MVT::riscv_nxv4i8x3,  MVT::riscv_nxv4i8x4,  MVT::riscv_nxv4i8x5,
166       MVT::riscv_nxv4i8x6,  MVT::riscv_nxv4i8x7,  MVT::riscv_nxv4i8x8,
167       MVT::riscv_nxv8i8x2,  MVT::riscv_nxv8i8x3,  MVT::riscv_nxv8i8x4,
168       MVT::riscv_nxv8i8x5,  MVT::riscv_nxv8i8x6,  MVT::riscv_nxv8i8x7,
169       MVT::riscv_nxv8i8x8,  MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170       MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171 
172   if (Subtarget.hasVInstructions()) {
173     auto addRegClassForRVV = [this](MVT VT) {
174       // Disable the smallest fractional LMUL types if ELEN is less than
175       // RVVBitsPerBlock.
176       unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177       if (VT.getVectorMinNumElements() < MinElts)
178         return;
179 
180       unsigned Size = VT.getSizeInBits().getKnownMinValue();
181       const TargetRegisterClass *RC;
182       if (Size <= RISCV::RVVBitsPerBlock)
183         RC = &RISCV::VRRegClass;
184       else if (Size == 2 * RISCV::RVVBitsPerBlock)
185         RC = &RISCV::VRM2RegClass;
186       else if (Size == 4 * RISCV::RVVBitsPerBlock)
187         RC = &RISCV::VRM4RegClass;
188       else if (Size == 8 * RISCV::RVVBitsPerBlock)
189         RC = &RISCV::VRM8RegClass;
190       else
191         llvm_unreachable("Unexpected size");
192 
193       addRegisterClass(VT, RC);
194     };
195 
196     for (MVT VT : BoolVecVTs)
197       addRegClassForRVV(VT);
198     for (MVT VT : IntVecVTs) {
199       if (VT.getVectorElementType() == MVT::i64 &&
200           !Subtarget.hasVInstructionsI64())
201         continue;
202       addRegClassForRVV(VT);
203     }
204 
205     if (Subtarget.hasVInstructionsF16Minimal())
206       for (MVT VT : F16VecVTs)
207         addRegClassForRVV(VT);
208 
209     if (Subtarget.hasVInstructionsBF16Minimal())
210       for (MVT VT : BF16VecVTs)
211         addRegClassForRVV(VT);
212 
213     if (Subtarget.hasVInstructionsF32())
214       for (MVT VT : F32VecVTs)
215         addRegClassForRVV(VT);
216 
217     if (Subtarget.hasVInstructionsF64())
218       for (MVT VT : F64VecVTs)
219         addRegClassForRVV(VT);
220 
221     if (Subtarget.useRVVForFixedLengthVectors()) {
222       auto addRegClassForFixedVectors = [this](MVT VT) {
223         MVT ContainerVT = getContainerForFixedLengthVector(VT);
224         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226         addRegisterClass(VT, TRI.getRegClass(RCID));
227       };
228       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
229         if (useRVVForFixedLengthVectorVT(VT))
230           addRegClassForFixedVectors(VT);
231 
232       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
233         if (useRVVForFixedLengthVectorVT(VT))
234           addRegClassForFixedVectors(VT);
235     }
236 
237     addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238     addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239     addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240     addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241     addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242     addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243     addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244     addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245     addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246     addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247     addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248     addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249     addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250     addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251     addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252     addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253     addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254     addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255     addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256     addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257     addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258     addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259     addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260     addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261     addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262     addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263     addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264     addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265     addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266     addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267     addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268     addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269   }
270 
271   // Compute derived properties from the register classes.
272   computeRegisterProperties(STI.getRegisterInfo());
273 
274   setStackPointerRegisterToSaveRestore(RISCV::X2);
275 
276   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
277                    MVT::i1, Promote);
278   // DAGCombiner can call isLoadExtLegal for types that aren't legal.
279   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
280                    MVT::i1, Promote);
281 
282   // TODO: add all necessary setOperationAction calls.
283   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);
284 
285   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
286   setOperationAction(ISD::BR_CC, XLenVT, Expand);
287   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
288   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
289 
290   setCondCodeAction(ISD::SETGT, XLenVT, Custom);
291   setCondCodeAction(ISD::SETGE, XLenVT, Expand);
292   setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
293   setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
294   if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
295     setCondCodeAction(ISD::SETULE, XLenVT, Expand);
296     setCondCodeAction(ISD::SETLE, XLenVT, Expand);
297   }
298 
299   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
300 
301   setOperationAction(ISD::VASTART, MVT::Other, Custom);
302   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
303 
304   if (!Subtarget.hasVendorXTHeadBb())
305     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
306 
307   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
308 
309   if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310       !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311     setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312 
313   if (Subtarget.is64Bit()) {
314     setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
315 
316     setOperationAction(ISD::LOAD, MVT::i32, Custom);
317     setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
318                        MVT::i32, Custom);
319     setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Custom);
320     if (!Subtarget.hasStdExtZbb())
321       setOperationAction(
322           {ISD::SADDSAT, ISD::SSUBSAT, ISD::UADDSAT, ISD::USUBSAT}, MVT::i32,
323           Custom);
324     setOperationAction(ISD::SADDO, MVT::i32, Custom);
325   }
326   if (!Subtarget.hasStdExtZmmul()) {
327     setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
328   } else if (Subtarget.is64Bit()) {
329     setOperationAction(ISD::MUL, MVT::i128, Custom);
330     setOperationAction(ISD::MUL, MVT::i32, Custom);
331   } else {
332     setOperationAction(ISD::MUL, MVT::i64, Custom);
333   }
334 
335   if (!Subtarget.hasStdExtM()) {
336     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, XLenVT,
337                        Expand);
338   } else if (Subtarget.is64Bit()) {
339     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
340                        {MVT::i8, MVT::i16, MVT::i32}, Custom);
341   }
342 
343   setOperationAction(
344       {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
345       Expand);
346 
347   setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
348                      Custom);
349 
350   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351     if (Subtarget.is64Bit())
352       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
353   } else if (Subtarget.hasVendorXTHeadBb()) {
354     if (Subtarget.is64Bit())
355       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
356     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
357   } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
358     setOperationAction(ISD::ROTL, XLenVT, Expand);
359   } else {
360     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
361   }
362 
363   // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364   // pattern match it directly in isel.
365   setOperationAction(ISD::BSWAP, XLenVT,
366                      (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367                       Subtarget.hasVendorXTHeadBb())
368                          ? Legal
369                          : Expand);
370 
371   if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
372     setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
373   } else {
374     // Zbkb can use rev8+brev8 to implement bitreverse.
375     setOperationAction(ISD::BITREVERSE, XLenVT,
376                        Subtarget.hasStdExtZbkb() ? Custom : Expand);
377   }
378 
379   if (Subtarget.hasStdExtZbb() ||
380       (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
381     setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
382                        Legal);
383   }
384 
385   if (Subtarget.hasStdExtZbb() ||
386       (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387     if (Subtarget.is64Bit())
388       setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
389   } else {
390     setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
391   }
392 
393   if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394       (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395     // We need the custom lowering to make sure that the resulting sequence
396     // for the 32bit case is efficient on 64bit targets.
397     if (Subtarget.is64Bit())
398       setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
399   } else {
400     setOperationAction(ISD::CTLZ, XLenVT, Expand);
401   }
402 
403   if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
404     setOperationAction(ISD::ABS, XLenVT, Legal);
405   } else if (Subtarget.hasShortForwardBranchOpt()) {
406     // We can use PseudoCCSUB to implement ABS.
407     setOperationAction(ISD::ABS, XLenVT, Legal);
408   } else if (Subtarget.is64Bit()) {
409     setOperationAction(ISD::ABS, MVT::i32, Custom);
410   }
411 
412   if (Subtarget.useCCMovInsn())
413     setOperationAction(ISD::SELECT, XLenVT, Legal);
414   else if (!Subtarget.hasVendorXTHeadCondMov())
415     setOperationAction(ISD::SELECT, XLenVT, Custom);
416 
417   static const unsigned FPLegalNodeTypes[] = {
418       ISD::FMINNUM,       ISD::FMAXNUM,        ISD::FMINIMUMNUM,
419       ISD::FMAXIMUMNUM,   ISD::LRINT,          ISD::LLRINT,
420       ISD::LROUND,        ISD::LLROUND,        ISD::STRICT_LRINT,
421       ISD::STRICT_LLRINT, ISD::STRICT_LROUND,  ISD::STRICT_LLROUND,
422       ISD::STRICT_FMA,    ISD::STRICT_FADD,    ISD::STRICT_FSUB,
423       ISD::STRICT_FMUL,   ISD::STRICT_FDIV,    ISD::STRICT_FSQRT,
424       ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::FCANONICALIZE};
425 
426   static const ISD::CondCode FPCCToExpand[] = {
427       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
428       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
429       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
430 
431   static const unsigned FPOpToExpand[] = {
432       ISD::FSIN, ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
433       ISD::FREM};
434 
435   static const unsigned FPRndMode[] = {
436       ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
437       ISD::FROUNDEVEN};
438 
439   static const unsigned ZfhminZfbfminPromoteOps[] = {
440       ISD::FMINNUM,      ISD::FMAXNUM,       ISD::FMAXIMUMNUM,
441       ISD::FMINIMUMNUM,  ISD::FADD,          ISD::FSUB,
442       ISD::FMUL,         ISD::FMA,           ISD::FDIV,
443       ISD::FSQRT,        ISD::STRICT_FMA,    ISD::STRICT_FADD,
444       ISD::STRICT_FSUB,  ISD::STRICT_FMUL,   ISD::STRICT_FDIV,
445       ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
446       ISD::SETCC,        ISD::FCEIL,         ISD::FFLOOR,
447       ISD::FTRUNC,       ISD::FRINT,         ISD::FROUND,
448       ISD::FROUNDEVEN,   ISD::FCANONICALIZE};
449 
450   if (Subtarget.hasStdExtZfbfmin()) {
451     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
452     setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
453     setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);
454     setOperationAction(ISD::SELECT, MVT::bf16, Custom);
455     setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
456     setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
457     setOperationAction(ISD::FREM, MVT::bf16, Promote);
458     setOperationAction(ISD::FABS, MVT::bf16, Custom);
459     setOperationAction(ISD::FNEG, MVT::bf16, Custom);
460     setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Custom);
461     setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, XLenVT, Custom);
462     setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, XLenVT, Custom);
463   }
464 
465   if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
466     if (Subtarget.hasStdExtZfhOrZhinx()) {
467       setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
468       setOperationAction(FPRndMode, MVT::f16,
469                          Subtarget.hasStdExtZfa() ? Legal : Custom);
470       setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
471       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
472                          Subtarget.hasStdExtZfa() ? Legal : Custom);
473       if (Subtarget.hasStdExtZfa())
474         setOperationAction(ISD::ConstantFP, MVT::f16, Custom);
475     } else {
476       setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
477       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
478       for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
479                       ISD::STRICT_LROUND, ISD::STRICT_LLROUND,
480                       ISD::STRICT_LRINT, ISD::STRICT_LLRINT})
481         setOperationAction(Op, MVT::f16, Custom);
482       setOperationAction(ISD::FABS, MVT::f16, Custom);
483       setOperationAction(ISD::FNEG, MVT::f16, Custom);
484       setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
485       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, XLenVT, Custom);
486       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, XLenVT, Custom);
487     }
488 
489     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
490 
491     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
492     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
493     setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
494     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
495     setOperationAction(ISD::SELECT, MVT::f16, Custom);
496     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
497 
498     setOperationAction(
499         ISD::FNEARBYINT, MVT::f16,
500         Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
501     setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
502                         ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
503                         ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
504                         ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
505                        MVT::f16, Promote);
506 
507     // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
508     // complete support for all operations in LegalizeDAG.
509     setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
510                         ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
511                         ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
512                         ISD::STRICT_FTRUNC, ISD::STRICT_FLDEXP},
513                        MVT::f16, Promote);
514 
515     // We need to custom promote this.
516     if (Subtarget.is64Bit())
517       setOperationAction(ISD::FPOWI, MVT::i32, Custom);
518   }
519 
520   if (Subtarget.hasStdExtFOrZfinx()) {
521     setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
522     setOperationAction(FPRndMode, MVT::f32,
523                        Subtarget.hasStdExtZfa() ? Legal : Custom);
524     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
525     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
526     setOperationAction(ISD::SELECT, MVT::f32, Custom);
527     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
528     setOperationAction(FPOpToExpand, MVT::f32, Expand);
529     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
530     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
531     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
532     setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
533     setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
534     setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
535     setOperationAction(ISD::FP_TO_BF16, MVT::f32,
536                        Subtarget.isSoftFPABI() ? LibCall : Custom);
537     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
538     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
539     setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
540     setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
541 
542     if (Subtarget.hasStdExtZfa()) {
543       setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
544       setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
545       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
546     } else {
547       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
548     }
549   }
550 
551   if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
552     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
553 
554   if (Subtarget.hasStdExtDOrZdinx()) {
555     setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
556 
557     if (!Subtarget.is64Bit())
558       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
559 
560     if (Subtarget.hasStdExtZfa()) {
561       setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
562       setOperationAction(FPRndMode, MVT::f64, Legal);
563       setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
564       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
565     } else {
566       if (Subtarget.is64Bit())
567         setOperationAction(FPRndMode, MVT::f64, Custom);
568 
569       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
570     }
571 
572     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
573     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
574     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
575     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
576     setOperationAction(ISD::SELECT, MVT::f64, Custom);
577     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
578     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
579     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
580     setOperationAction(FPOpToExpand, MVT::f64, Expand);
581     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
582     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
583     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
584     setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
585     setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
586     setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
587     setOperationAction(ISD::FP_TO_BF16, MVT::f64,
588                        Subtarget.isSoftFPABI() ? LibCall : Custom);
589     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
590     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
591     setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
592     setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
593   }
594 
595   if (Subtarget.is64Bit()) {
596     setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
597                         ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
598                        MVT::i32, Custom);
599     setOperationAction(ISD::LROUND, MVT::i32, Custom);
600   }
601 
602   if (Subtarget.hasStdExtFOrZfinx()) {
603     setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
604                        Custom);
605 
606     // f16/bf16 require custom handling.
607     setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, XLenVT,
608                        Custom);
609     setOperationAction({ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, XLenVT,
610                        Custom);
611 
612     setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
613     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
614   }
615 
616   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
617                       ISD::JumpTable},
618                      XLenVT, Custom);
619 
620   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
621 
622   if (Subtarget.is64Bit())
623     setOperationAction(ISD::Constant, MVT::i64, Custom);
624 
625   // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
626   // Unfortunately this can't be determined just from the ISA naming string.
627   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
628                      Subtarget.is64Bit() ? Legal : Custom);
629   setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
630                      Subtarget.is64Bit() ? Legal : Custom);
631 
632   if (Subtarget.is64Bit()) {
633     setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
634     setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
635   }
636 
637   setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
638   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
639   if (Subtarget.is64Bit())
640     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
641 
642   if (Subtarget.hasStdExtZicbop()) {
643     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
644   }
645 
646   if (Subtarget.hasStdExtA()) {
647     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
648     if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
649       setMinCmpXchgSizeInBits(8);
650     else
651       setMinCmpXchgSizeInBits(32);
652   } else if (Subtarget.hasForcedAtomics()) {
653     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
654   } else {
655     setMaxAtomicSizeInBitsSupported(0);
656   }
657 
658   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
659 
660   setBooleanContents(ZeroOrOneBooleanContent);
661 
662   if (getTargetMachine().getTargetTriple().isOSLinux()) {
663     // Custom lowering of llvm.clear_cache.
664     setOperationAction(ISD::CLEAR_CACHE, MVT::Other, Custom);
665   }
666 
667   if (Subtarget.hasVInstructions()) {
668     setBooleanVectorContents(ZeroOrOneBooleanContent);
669 
670     setOperationAction(ISD::VSCALE, XLenVT, Custom);
671 
672     // RVV intrinsics may have illegal operands.
673     // We also need to custom legalize vmv.x.s.
674     setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
675                         ISD::INTRINSIC_VOID},
676                        {MVT::i8, MVT::i16}, Custom);
677     if (Subtarget.is64Bit())
678       setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
679                          MVT::i32, Custom);
680     else
681       setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
682                          MVT::i64, Custom);
683 
684     setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
685                        MVT::Other, Custom);
686 
687     static const unsigned IntegerVPOps[] = {
688         ISD::VP_ADD,         ISD::VP_SUB,         ISD::VP_MUL,
689         ISD::VP_SDIV,        ISD::VP_UDIV,        ISD::VP_SREM,
690         ISD::VP_UREM,        ISD::VP_AND,         ISD::VP_OR,
691         ISD::VP_XOR,         ISD::VP_SRA,         ISD::VP_SRL,
692         ISD::VP_SHL,         ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,
693         ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,
694         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695         ISD::VP_MERGE,       ISD::VP_SELECT,      ISD::VP_FP_TO_SINT,
696         ISD::VP_FP_TO_UINT,  ISD::VP_SETCC,       ISD::VP_SIGN_EXTEND,
697         ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE,    ISD::VP_SMIN,
698         ISD::VP_SMAX,        ISD::VP_UMIN,        ISD::VP_UMAX,
699         ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700         ISD::VP_SADDSAT,     ISD::VP_UADDSAT,     ISD::VP_SSUBSAT,
701         ISD::VP_USUBSAT,     ISD::VP_CTTZ_ELTS,   ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
702         ISD::EXPERIMENTAL_VP_SPLAT};
703 
704     static const unsigned FloatingPointVPOps[] = {
705         ISD::VP_FADD,        ISD::VP_FSUB,        ISD::VP_FMUL,
706         ISD::VP_FDIV,        ISD::VP_FNEG,        ISD::VP_FABS,
707         ISD::VP_FMA,         ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
708         ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
709         ISD::VP_SELECT,      ISD::VP_SINT_TO_FP,  ISD::VP_UINT_TO_FP,
710         ISD::VP_SETCC,       ISD::VP_FP_ROUND,    ISD::VP_FP_EXTEND,
711         ISD::VP_SQRT,        ISD::VP_FMINNUM,     ISD::VP_FMAXNUM,
712         ISD::VP_FCEIL,       ISD::VP_FFLOOR,      ISD::VP_FROUND,
713         ISD::VP_FROUNDEVEN,  ISD::VP_FCOPYSIGN,   ISD::VP_FROUNDTOZERO,
714         ISD::VP_FRINT,       ISD::VP_FNEARBYINT,  ISD::VP_IS_FPCLASS,
715         ISD::VP_FMINIMUM,    ISD::VP_FMAXIMUM,    ISD::VP_LRINT,
716         ISD::VP_LLRINT,      ISD::EXPERIMENTAL_VP_REVERSE,
717         ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
718         ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
719 
720     static const unsigned IntegerVecReduceOps[] = {
721         ISD::VECREDUCE_ADD,  ISD::VECREDUCE_AND,  ISD::VECREDUCE_OR,
722         ISD::VECREDUCE_XOR,  ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
723         ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
724 
725     static const unsigned FloatingPointVecReduceOps[] = {
726         ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
727         ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
728 
729     static const unsigned FloatingPointLibCallOps[] = {
730         ISD::FREM,  ISD::FPOW,   ISD::FCOS, ISD::FSIN,  ISD::FSINCOS, ISD::FEXP,
731         ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
732 
733     if (!Subtarget.is64Bit()) {
734       // We must custom-lower certain vXi64 operations on RV32 due to the vector
735       // element type being illegal.
736       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
737                          MVT::i64, Custom);
738 
739       setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
740 
741       setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
742                           ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
743                           ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
744                           ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
745                          MVT::i64, Custom);
746     }
747 
748     for (MVT VT : BoolVecVTs) {
749       if (!isTypeLegal(VT))
750         continue;
751 
752       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
753 
754       // Mask VTs are custom-expanded into a series of standard nodes
755       setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
756                           ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
757                           ISD::SCALAR_TO_VECTOR},
758                          VT, Custom);
759 
760       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
761                          Custom);
762 
763       setOperationAction(ISD::SELECT, VT, Custom);
764       setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
765                          Expand);
766       setOperationAction(ISD::VP_MERGE, VT, Custom);
767 
768       setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
769                          Custom);
770 
771       setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
772 
773       setOperationAction(
774           {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
775           Custom);
776 
777       setOperationAction(
778           {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
779           Custom);
780 
781       // RVV has native int->float & float->int conversions where the
782       // element type sizes are within one power-of-two of each other. Any
783       // wider distances between type sizes have to be lowered as sequences
784       // which progressively narrow the gap in stages.
785       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
786                           ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
787                           ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
788                           ISD::STRICT_FP_TO_UINT},
789                          VT, Custom);
790       setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
791                          Custom);
792 
793       // Expand all extending loads to types larger than this, and truncating
794       // stores from types larger than this.
795       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
796         setTruncStoreAction(VT, OtherVT, Expand);
797         setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
798                          OtherVT, Expand);
799       }
800 
801       setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
802                           ISD::VP_TRUNCATE, ISD::VP_SETCC},
803                          VT, Custom);
804 
805       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
806       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
807 
808       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
809 
810       setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
811       setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
812 
813       setOperationPromotedToType(
814           ISD::VECTOR_SPLICE, VT,
815           MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
816     }
817 
818     for (MVT VT : IntVecVTs) {
819       if (!isTypeLegal(VT))
820         continue;
821 
822       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
823       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
824 
825       // Vectors implement MULHS/MULHU.
826       setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
827 
828       // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
829       if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
830         setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
831 
832       setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
833                          Legal);
834 
835       setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
836 
837       // Custom-lower extensions and truncations from/to mask types.
838       setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
839                          VT, Custom);
840 
841       // RVV has native int->float & float->int conversions where the
842       // element type sizes are within one power-of-two of each other. Any
843       // wider distances between type sizes have to be lowered as sequences
844       // which progressively narrow the gap in stages.
845       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
846                           ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
847                           ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
848                           ISD::STRICT_FP_TO_UINT},
849                          VT, Custom);
850       setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
851                          Custom);
852       setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,
853                           ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT,
854                           ISD::SSUBSAT, ISD::USUBSAT},
855                          VT, Legal);
856 
857       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
858       // nodes which truncate by one power of two at a time.
859       setOperationAction(
860           {ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,
861           Custom);
862 
863       // Custom-lower insert/extract operations to simplify patterns.
864       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
865                          Custom);
866 
867       // Custom-lower reduction operations to set up the corresponding custom
868       // nodes' operands.
869       setOperationAction(IntegerVecReduceOps, VT, Custom);
870 
871       setOperationAction(IntegerVPOps, VT, Custom);
872 
873       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
874 
875       setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
876                          VT, Custom);
877 
878       setOperationAction(
879           {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
880            ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
881           VT, Custom);
882 
883       setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
884                           ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
885                          VT, Custom);
886 
887       setOperationAction(ISD::SELECT, VT, Custom);
888       setOperationAction(ISD::SELECT_CC, VT, Expand);
889 
890       setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
891 
892       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
893         setTruncStoreAction(VT, OtherVT, Expand);
894         setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
895                          OtherVT, Expand);
896       }
897 
898       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
899       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
900 
901       // Splice
902       setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
903 
904       if (Subtarget.hasStdExtZvkb()) {
905         setOperationAction(ISD::BSWAP, VT, Legal);
906         setOperationAction(ISD::VP_BSWAP, VT, Custom);
907       } else {
908         setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
909         setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
910       }
911 
912       if (Subtarget.hasStdExtZvbb()) {
913         setOperationAction(ISD::BITREVERSE, VT, Legal);
914         setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
915         setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
916                             ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
917                            VT, Custom);
918       } else {
919         setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
920         setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
921         setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
922                             ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
923                            VT, Expand);
924 
925         // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
926         // range of f32.
927         EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
928         if (isTypeLegal(FloatVT)) {
929           setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
930                               ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
931                               ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
932                              VT, Custom);
933         }
934       }
935 
936       setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
937     }
938 
939     for (MVT VT : VecTupleVTs) {
940       if (!isTypeLegal(VT))
941         continue;
942 
943       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
944     }
945 
946     // Expand various CCs to best match the RVV ISA, which natively supports UNE
947     // but no other unordered comparisons, and supports all ordered comparisons
948     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
949     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
950     // and we pattern-match those back to the "original", swapping operands once
951     // more. This way we catch both operations and both "vf" and "fv" forms with
952     // fewer patterns.
953     static const ISD::CondCode VFPCCToExpand[] = {
954         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
955         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
956         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
957     };
958 
959     // TODO: support more ops.
960     static const unsigned ZvfhminZvfbfminPromoteOps[] = {
961         ISD::FMINNUM,     ISD::FMAXNUM,     ISD::FADD,        ISD::FSUB,
962         ISD::FMUL,        ISD::FMA,         ISD::FDIV,        ISD::FSQRT,
963         ISD::FCEIL,       ISD::FTRUNC,      ISD::FFLOOR,      ISD::FROUND,
964         ISD::FROUNDEVEN,  ISD::FRINT,       ISD::FNEARBYINT,  ISD::IS_FPCLASS,
965         ISD::SETCC,       ISD::FMAXIMUM,    ISD::FMINIMUM,    ISD::STRICT_FADD,
966         ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
967         ISD::STRICT_FMA};
968 
969     // TODO: support more vp ops.
970     static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
971         ISD::VP_FADD,
972         ISD::VP_FSUB,
973         ISD::VP_FMUL,
974         ISD::VP_FDIV,
975         ISD::VP_FMA,
976         ISD::VP_REDUCE_FMIN,
977         ISD::VP_REDUCE_FMAX,
978         ISD::VP_SQRT,
979         ISD::VP_FMINNUM,
980         ISD::VP_FMAXNUM,
981         ISD::VP_FCEIL,
982         ISD::VP_FFLOOR,
983         ISD::VP_FROUND,
984         ISD::VP_FROUNDEVEN,
985         ISD::VP_FROUNDTOZERO,
986         ISD::VP_FRINT,
987         ISD::VP_FNEARBYINT,
988         ISD::VP_SETCC,
989         ISD::VP_FMINIMUM,
990         ISD::VP_FMAXIMUM,
991         ISD::VP_REDUCE_FMINIMUM,
992         ISD::VP_REDUCE_FMAXIMUM};
993 
994     // Sets common operation actions on RVV floating-point vector types.
995     const auto SetCommonVFPActions = [&](MVT VT) {
996       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
997       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
998       // sizes are within one power-of-two of each other. Therefore conversions
999       // between vXf16 and vXf64 must be lowered as sequences which convert via
1000       // vXf32.
1001       setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1002       setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1003       // Custom-lower insert/extract operations to simplify patterns.
1004       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
1005                          Custom);
1006       // Expand various condition codes (explained above).
1007       setCondCodeAction(VFPCCToExpand, VT, Expand);
1008 
1009       setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
1010       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
1011 
1012       setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1013                           ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1014                           ISD::IS_FPCLASS},
1015                          VT, Custom);
1016 
1017       setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1018 
1019       // Expand FP operations that need libcalls.
1020       setOperationAction(FloatingPointLibCallOps, VT, Expand);
1021 
1022       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
1023 
1024       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1025 
1026       setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1027                          VT, Custom);
1028 
1029       setOperationAction(
1030           {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1031            ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1032           VT, Custom);
1033 
1034       setOperationAction(ISD::SELECT, VT, Custom);
1035       setOperationAction(ISD::SELECT_CC, VT, Expand);
1036 
1037       setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1038                           ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1039                          VT, Custom);
1040 
1041       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
1042       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
1043 
1044       setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
1045 
1046       setOperationAction(FloatingPointVPOps, VT, Custom);
1047 
1048       setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1049                          Custom);
1050       setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1051                           ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
1052                          VT, Legal);
1053       setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
1054                           ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
1055                           ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1056                           ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1057                          VT, Custom);
1058 
1059       setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
1060     };
1061 
1062     // Sets common extload/truncstore actions on RVV floating-point vector
1063     // types.
1064     const auto SetCommonVFPExtLoadTruncStoreActions =
1065         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1066           for (auto SmallVT : SmallerVTs) {
1067             setTruncStoreAction(VT, SmallVT, Expand);
1068             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1069           }
1070         };
1071 
1072     // Sets common actions for f16 and bf16 for when there's only
1073     // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1074     const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1075       setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1076       setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1077                          Custom);
1078       setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1079       setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1080                          Custom);
1081       setOperationAction(ISD::SELECT_CC, VT, Expand);
1082       setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1083       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
1084                           ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
1085                           ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
1086                           ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
1087                           ISD::VECTOR_COMPRESS},
1088                          VT, Custom);
1089       MVT EltVT = VT.getVectorElementType();
1090       if (isTypeLegal(EltVT))
1091         setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1092                             ISD::EXTRACT_VECTOR_ELT},
1093                            VT, Custom);
1094       else
1095         setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1096                            EltVT, Custom);
1097       setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1098                           ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1099                           ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1100                           ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1101                           ISD::VP_SCATTER},
1102                          VT, Custom);
1103 
1104       setOperationAction(ISD::FNEG, VT, Expand);
1105       setOperationAction(ISD::FABS, VT, Expand);
1106       setOperationAction(ISD::FCOPYSIGN, VT, Expand);
1107 
1108       // Expand FP operations that need libcalls.
1109       setOperationAction(FloatingPointLibCallOps, VT, Expand);
1110 
1111       // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1112       if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1113         setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1114         setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1115       } else {
1116         MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1117         setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1118         setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1119       }
1120     };
1121 
1122     if (Subtarget.hasVInstructionsF16()) {
1123       for (MVT VT : F16VecVTs) {
1124         if (!isTypeLegal(VT))
1125           continue;
1126         SetCommonVFPActions(VT);
1127       }
1128     } else if (Subtarget.hasVInstructionsF16Minimal()) {
1129       for (MVT VT : F16VecVTs) {
1130         if (!isTypeLegal(VT))
1131           continue;
1132         SetCommonPromoteToF32Actions(VT);
1133       }
1134     }
1135 
1136     if (Subtarget.hasVInstructionsBF16Minimal()) {
1137       for (MVT VT : BF16VecVTs) {
1138         if (!isTypeLegal(VT))
1139           continue;
1140         SetCommonPromoteToF32Actions(VT);
1141       }
1142     }
1143 
1144     if (Subtarget.hasVInstructionsF32()) {
1145       for (MVT VT : F32VecVTs) {
1146         if (!isTypeLegal(VT))
1147           continue;
1148         SetCommonVFPActions(VT);
1149         SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1150         SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1151       }
1152     }
1153 
1154     if (Subtarget.hasVInstructionsF64()) {
1155       for (MVT VT : F64VecVTs) {
1156         if (!isTypeLegal(VT))
1157           continue;
1158         SetCommonVFPActions(VT);
1159         SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1160         SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1161         SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1162       }
1163     }
1164 
1165     if (Subtarget.useRVVForFixedLengthVectors()) {
1166       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1167         if (!useRVVForFixedLengthVectorVT(VT))
1168           continue;
1169 
1170         // By default everything must be expanded.
1171         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1172           setOperationAction(Op, VT, Expand);
1173         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
1174           setTruncStoreAction(VT, OtherVT, Expand);
1175           setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,
1176                            OtherVT, Expand);
1177         }
1178 
1179         // Custom lower fixed vector undefs to scalable vector undefs to avoid
1180         // expansion to a build_vector of 0s.
1181         setOperationAction(ISD::UNDEF, VT, Custom);
1182 
1183         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1184         setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1185                            Custom);
1186 
1187         setOperationAction(
1188             {ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, ISD::VECTOR_REVERSE}, VT,
1189             Custom);
1190 
1191         setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
1192                            VT, Custom);
1193 
1194         setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1195 
1196         setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1197 
1198         setOperationAction(ISD::SETCC, VT, Custom);
1199 
1200         setOperationAction(ISD::SELECT, VT, Custom);
1201 
1202         setOperationAction(
1203             {ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,
1204             Custom);
1205 
1206         setOperationAction(ISD::BITCAST, VT, Custom);
1207 
1208         setOperationAction(
1209             {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1210             Custom);
1211 
1212         setOperationAction(
1213             {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1214             Custom);
1215 
1216         setOperationAction(
1217             {
1218                 ISD::SINT_TO_FP,
1219                 ISD::UINT_TO_FP,
1220                 ISD::FP_TO_SINT,
1221                 ISD::FP_TO_UINT,
1222                 ISD::STRICT_SINT_TO_FP,
1223                 ISD::STRICT_UINT_TO_FP,
1224                 ISD::STRICT_FP_TO_SINT,
1225                 ISD::STRICT_FP_TO_UINT,
1226             },
1227             VT, Custom);
1228         setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
1229                            Custom);
1230 
1231         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1232 
1233         // Operations below are different for between masks and other vectors.
1234         if (VT.getVectorElementType() == MVT::i1) {
1235           setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1236                               ISD::OR, ISD::XOR},
1237                              VT, Custom);
1238 
1239           setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1240                               ISD::VP_SETCC, ISD::VP_TRUNCATE},
1241                              VT, Custom);
1242 
1243           setOperationAction(ISD::VP_MERGE, VT, Custom);
1244 
1245           setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1246           setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1247           continue;
1248         }
1249 
1250         // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1251         // it before type legalization for i64 vectors on RV32. It will then be
1252         // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1253         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1254         // improvements first.
1255         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1256           setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1257           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1258         }
1259 
1260         setOperationAction(
1261             {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1262 
1263         setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1264                             ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1265                             ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1266                             ISD::VP_SCATTER},
1267                            VT, Custom);
1268 
1269         setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1270                             ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1271                             ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1272                            VT, Custom);
1273 
1274         setOperationAction(
1275             {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
1276 
1277         setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
1278 
1279         // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1280         if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1281           setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
1282 
1283         setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,
1284                             ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT,
1285                             ISD::SSUBSAT, ISD::USUBSAT},
1286                            VT, Custom);
1287 
1288         setOperationAction(ISD::VSELECT, VT, Custom);
1289 
1290         setOperationAction(
1291             {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
1292 
1293         // Custom-lower reduction operations to set up the corresponding custom
1294         // nodes' operands.
1295         setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1296                             ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1297                             ISD::VECREDUCE_UMIN},
1298                            VT, Custom);
1299 
1300         setOperationAction(IntegerVPOps, VT, Custom);
1301 
1302         if (Subtarget.hasStdExtZvkb())
1303           setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);
1304 
1305         if (Subtarget.hasStdExtZvbb()) {
1306           setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
1307                               ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
1308                              VT, Custom);
1309         } else {
1310           // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1311           // range of f32.
1312           EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1313           if (isTypeLegal(FloatVT))
1314             setOperationAction(
1315                 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1316                 Custom);
1317         }
1318 
1319         setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
1320       }
1321 
1322       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1323         // There are no extending loads or truncating stores.
1324         for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1325           setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1326           setTruncStoreAction(VT, InnerVT, Expand);
1327         }
1328 
1329         if (!useRVVForFixedLengthVectorVT(VT))
1330           continue;
1331 
1332         // By default everything must be expanded.
1333         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1334           setOperationAction(Op, VT, Expand);
1335 
1336         // Custom lower fixed vector undefs to scalable vector undefs to avoid
1337         // expansion to a build_vector of 0s.
1338         setOperationAction(ISD::UNDEF, VT, Custom);
1339 
1340         setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
1341                             ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1342                             ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE,
1343                             ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
1344                            VT, Custom);
1345 
1346         setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1347                             ISD::MGATHER, ISD::MSCATTER},
1348                            VT, Custom);
1349         setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1350                             ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1351                             ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1352                            VT, Custom);
1353 
1354         setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1355         setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1356                            Custom);
1357 
1358         if (VT.getVectorElementType() == MVT::f16 &&
1359             !Subtarget.hasVInstructionsF16()) {
1360           setOperationAction(ISD::BITCAST, VT, Custom);
1361           setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1362           setOperationAction(
1363               {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1364               Custom);
1365           setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1366                              Custom);
1367           if (Subtarget.hasStdExtZfhmin()) {
1368             setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1369           } else {
1370             // We need to custom legalize f16 build vectors if Zfhmin isn't
1371             // available.
1372             setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
1373           }
1374           setOperationAction(ISD::FNEG, VT, Expand);
1375           setOperationAction(ISD::FABS, VT, Expand);
1376           setOperationAction(ISD::FCOPYSIGN, VT, Expand);
1377           MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1378           // Don't promote f16 vector operations to f32 if f32 vector type is
1379           // not legal.
1380           // TODO: could split the f16 vector into two vectors and do promotion.
1381           if (!isTypeLegal(F32VecVT))
1382             continue;
1383           setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1384           setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1385           continue;
1386         }
1387 
1388         if (VT.getVectorElementType() == MVT::bf16) {
1389           setOperationAction(ISD::BITCAST, VT, Custom);
1390           setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1391           if (Subtarget.hasStdExtZfbfmin()) {
1392             setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1393           } else {
1394             // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1395             // available.
1396             setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
1397           }
1398           setOperationAction(
1399               {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1400               Custom);
1401           MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1402           // Don't promote f16 vector operations to f32 if f32 vector type is
1403           // not legal.
1404           // TODO: could split the f16 vector into two vectors and do promotion.
1405           if (!isTypeLegal(F32VecVT))
1406             continue;
1407           setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1408           // TODO: Promote VP ops to fp32.
1409           continue;
1410         }
1411 
1412         setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
1413                            Custom);
1414 
1415         setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1416                             ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1417                             ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1418                             ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
1419                            VT, Custom);
1420 
1421         setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1422                             ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1423                            VT, Custom);
1424 
1425         setCondCodeAction(VFPCCToExpand, VT, Expand);
1426 
1427         setOperationAction(ISD::SETCC, VT, Custom);
1428         setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
1429 
1430         setOperationAction(ISD::BITCAST, VT, Custom);
1431 
1432         setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1433 
1434         setOperationAction(FloatingPointVPOps, VT, Custom);
1435 
1436         setOperationAction(
1437             {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1438              ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1439              ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1440              ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1441              ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1442             VT, Custom);
1443       }
1444 
1445       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1446       setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1447       if (Subtarget.is64Bit())
1448         setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1449       if (Subtarget.hasStdExtZfhminOrZhinxmin())
1450         setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1451       if (Subtarget.hasStdExtZfbfmin())
1452         setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
1453       if (Subtarget.hasStdExtFOrZfinx())
1454         setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1455       if (Subtarget.hasStdExtDOrZdinx())
1456         setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1457     }
1458   }
1459 
1460   if (Subtarget.hasStdExtA())
1461     setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1462 
1463   if (Subtarget.hasForcedAtomics()) {
1464     // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1465     setOperationAction(
1466         {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1467          ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1468          ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1469          ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1470         XLenVT, LibCall);
1471   }
1472 
1473   if (Subtarget.hasVendorXTHeadMemIdx()) {
1474     for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1475       setIndexedLoadAction(im, MVT::i8, Legal);
1476       setIndexedStoreAction(im, MVT::i8, Legal);
1477       setIndexedLoadAction(im, MVT::i16, Legal);
1478       setIndexedStoreAction(im, MVT::i16, Legal);
1479       setIndexedLoadAction(im, MVT::i32, Legal);
1480       setIndexedStoreAction(im, MVT::i32, Legal);
1481 
1482       if (Subtarget.is64Bit()) {
1483         setIndexedLoadAction(im, MVT::i64, Legal);
1484         setIndexedStoreAction(im, MVT::i64, Legal);
1485       }
1486     }
1487   }
1488 
1489   if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1490     setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
1491     setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
1492     setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
1493 
1494     setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
1495     setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
1496     setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
1497   }
1498 
1499   // Function alignments.
1500   const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1501   setMinFunctionAlignment(FunctionAlignment);
1502   // Set preferred alignments.
1503   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1504   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1505 
1506   setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1507                        ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,
1508                        ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1509   setTargetDAGCombine(ISD::SRA);
1510   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1511 
1512   if (Subtarget.hasStdExtFOrZfinx())
1513     setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1514 
1515   if (Subtarget.hasStdExtZbb())
1516     setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1517 
1518   if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1519       Subtarget.hasVInstructions())
1520     setTargetDAGCombine(ISD::TRUNCATE);
1521 
1522   if (Subtarget.hasStdExtZbkb())
1523     setTargetDAGCombine(ISD::BITREVERSE);
1524 
1525   if (Subtarget.hasStdExtFOrZfinx())
1526     setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1527                          ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1528   if (Subtarget.hasVInstructions())
1529     setTargetDAGCombine({ISD::FCOPYSIGN,     ISD::MGATHER,
1530                          ISD::MSCATTER,      ISD::VP_GATHER,
1531                          ISD::VP_SCATTER,    ISD::SRA,
1532                          ISD::SRL,           ISD::SHL,
1533                          ISD::STORE,         ISD::SPLAT_VECTOR,
1534                          ISD::BUILD_VECTOR,  ISD::CONCAT_VECTORS,
1535                          ISD::VP_STORE,      ISD::EXPERIMENTAL_VP_REVERSE,
1536                          ISD::MUL,           ISD::SDIV,
1537                          ISD::UDIV,          ISD::SREM,
1538                          ISD::UREM,          ISD::INSERT_VECTOR_ELT,
1539                          ISD::ABS,           ISD::CTPOP,
1540                          ISD::VECTOR_SHUFFLE, ISD::VSELECT});
1541 
1542   if (Subtarget.hasVendorXTHeadMemPair())
1543     setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1544   if (Subtarget.useRVVForFixedLengthVectors())
1545     setTargetDAGCombine(ISD::BITCAST);
1546 
1547   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1548   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1549 
1550   // Disable strict node mutation.
1551   IsStrictFPEnabled = true;
1552   EnableExtLdPromotion = true;
1553 
1554   // Let the subtarget decide if a predictable select is more expensive than the
1555   // corresponding branch. This information is used in CGP/SelectOpt to decide
1556   // when to convert selects into branches.
1557   PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1558 
1559   MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1560   MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1561 
1562   MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1563   MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1564   MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1565 
1566   MaxStoresPerMemmoveOptSize =
1567       Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1568   MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1569 
1570   MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1571   MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1572 }
1573 
1574 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1575                                             LLVMContext &Context,
1576                                             EVT VT) const {
1577   if (!VT.isVector())
1578     return getPointerTy(DL);
1579   if (Subtarget.hasVInstructions() &&
1580       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1581     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1582   return VT.changeVectorElementTypeToInteger();
1583 }
1584 
1585 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1586   return Subtarget.getXLenVT();
1587 }
1588 
1589 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1590 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1591                                                       unsigned VF,
1592                                                       bool IsScalable) const {
1593   if (!Subtarget.hasVInstructions())
1594     return true;
1595 
1596   if (!IsScalable)
1597     return true;
1598 
1599   if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1600     return true;
1601 
1602   // Don't allow VF=1 if those types are't legal.
1603   if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1604     return true;
1605 
1606   // VLEN=32 support is incomplete.
1607   if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1608     return true;
1609 
1610   // The maximum VF is for the smallest element width with LMUL=8.
1611   // VF must be a power of 2.
1612   unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1613   return VF > MaxVF || !isPowerOf2_32(VF);
1614 }
1615 
1616 bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1617   return !Subtarget.hasVInstructions() ||
1618          VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1619 }
1620 
1621 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1622                                              const CallInst &I,
1623                                              MachineFunction &MF,
1624                                              unsigned Intrinsic) const {
1625   auto &DL = I.getDataLayout();
1626 
1627   auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1628                                  bool IsUnitStrided, bool UsePtrVal = false) {
1629     Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1630     // We can't use ptrVal if the intrinsic can access memory before the
1631     // pointer. This means we can't use it for strided or indexed intrinsics.
1632     if (UsePtrVal)
1633       Info.ptrVal = I.getArgOperand(PtrOp);
1634     else
1635       Info.fallbackAddressSpace =
1636           I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1637     Type *MemTy;
1638     if (IsStore) {
1639       // Store value is the first operand.
1640       MemTy = I.getArgOperand(0)->getType();
1641     } else {
1642       // Use return type. If it's segment load, return type is a struct.
1643       MemTy = I.getType();
1644       if (MemTy->isStructTy())
1645         MemTy = MemTy->getStructElementType(0);
1646     }
1647     if (!IsUnitStrided)
1648       MemTy = MemTy->getScalarType();
1649 
1650     Info.memVT = getValueType(DL, MemTy);
1651     if (MemTy->isTargetExtTy()) {
1652       // RISC-V vector tuple type's alignment type should be its element type.
1653       if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1654         MemTy = Type::getIntNTy(
1655             MemTy->getContext(),
1656             1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1657                      ->getZExtValue());
1658       Info.align = DL.getABITypeAlign(MemTy);
1659     } else {
1660       Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1661     }
1662     Info.size = MemoryLocation::UnknownSize;
1663     Info.flags |=
1664         IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1665     return true;
1666   };
1667 
1668   if (I.hasMetadata(LLVMContext::MD_nontemporal))
1669     Info.flags |= MachineMemOperand::MONonTemporal;
1670 
1671   Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
1672   switch (Intrinsic) {
1673   default:
1674     return false;
1675   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1676   case Intrinsic::riscv_masked_atomicrmw_add_i32:
1677   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1678   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1679   case Intrinsic::riscv_masked_atomicrmw_max_i32:
1680   case Intrinsic::riscv_masked_atomicrmw_min_i32:
1681   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1682   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1683   case Intrinsic::riscv_masked_cmpxchg_i32:
1684     Info.opc = ISD::INTRINSIC_W_CHAIN;
1685     Info.memVT = MVT::i32;
1686     Info.ptrVal = I.getArgOperand(0);
1687     Info.offset = 0;
1688     Info.align = Align(4);
1689     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1690                  MachineMemOperand::MOVolatile;
1691     return true;
1692   case Intrinsic::riscv_seg2_load:
1693   case Intrinsic::riscv_seg3_load:
1694   case Intrinsic::riscv_seg4_load:
1695   case Intrinsic::riscv_seg5_load:
1696   case Intrinsic::riscv_seg6_load:
1697   case Intrinsic::riscv_seg7_load:
1698   case Intrinsic::riscv_seg8_load:
1699     return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1700                                /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1701   case Intrinsic::riscv_seg2_store:
1702   case Intrinsic::riscv_seg3_store:
1703   case Intrinsic::riscv_seg4_store:
1704   case Intrinsic::riscv_seg5_store:
1705   case Intrinsic::riscv_seg6_store:
1706   case Intrinsic::riscv_seg7_store:
1707   case Intrinsic::riscv_seg8_store:
1708     // Operands are (vec, ..., vec, ptr, vl)
1709     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1710                                /*IsStore*/ true,
1711                                /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1712   case Intrinsic::riscv_vle:
1713   case Intrinsic::riscv_vle_mask:
1714   case Intrinsic::riscv_vleff:
1715   case Intrinsic::riscv_vleff_mask:
1716     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1717                                /*IsStore*/ false,
1718                                /*IsUnitStrided*/ true,
1719                                /*UsePtrVal*/ true);
1720   case Intrinsic::riscv_vse:
1721   case Intrinsic::riscv_vse_mask:
1722     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1723                                /*IsStore*/ true,
1724                                /*IsUnitStrided*/ true,
1725                                /*UsePtrVal*/ true);
1726   case Intrinsic::riscv_vlse:
1727   case Intrinsic::riscv_vlse_mask:
1728   case Intrinsic::riscv_vloxei:
1729   case Intrinsic::riscv_vloxei_mask:
1730   case Intrinsic::riscv_vluxei:
1731   case Intrinsic::riscv_vluxei_mask:
1732     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1733                                /*IsStore*/ false,
1734                                /*IsUnitStrided*/ false);
1735   case Intrinsic::riscv_vsse:
1736   case Intrinsic::riscv_vsse_mask:
1737   case Intrinsic::riscv_vsoxei:
1738   case Intrinsic::riscv_vsoxei_mask:
1739   case Intrinsic::riscv_vsuxei:
1740   case Intrinsic::riscv_vsuxei_mask:
1741     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1742                                /*IsStore*/ true,
1743                                /*IsUnitStrided*/ false);
1744   case Intrinsic::riscv_vlseg2:
1745   case Intrinsic::riscv_vlseg3:
1746   case Intrinsic::riscv_vlseg4:
1747   case Intrinsic::riscv_vlseg5:
1748   case Intrinsic::riscv_vlseg6:
1749   case Intrinsic::riscv_vlseg7:
1750   case Intrinsic::riscv_vlseg8:
1751   case Intrinsic::riscv_vlseg2ff:
1752   case Intrinsic::riscv_vlseg3ff:
1753   case Intrinsic::riscv_vlseg4ff:
1754   case Intrinsic::riscv_vlseg5ff:
1755   case Intrinsic::riscv_vlseg6ff:
1756   case Intrinsic::riscv_vlseg7ff:
1757   case Intrinsic::riscv_vlseg8ff:
1758     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1759                                /*IsStore*/ false,
1760                                /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1761   case Intrinsic::riscv_vlseg2_mask:
1762   case Intrinsic::riscv_vlseg3_mask:
1763   case Intrinsic::riscv_vlseg4_mask:
1764   case Intrinsic::riscv_vlseg5_mask:
1765   case Intrinsic::riscv_vlseg6_mask:
1766   case Intrinsic::riscv_vlseg7_mask:
1767   case Intrinsic::riscv_vlseg8_mask:
1768   case Intrinsic::riscv_vlseg2ff_mask:
1769   case Intrinsic::riscv_vlseg3ff_mask:
1770   case Intrinsic::riscv_vlseg4ff_mask:
1771   case Intrinsic::riscv_vlseg5ff_mask:
1772   case Intrinsic::riscv_vlseg6ff_mask:
1773   case Intrinsic::riscv_vlseg7ff_mask:
1774   case Intrinsic::riscv_vlseg8ff_mask:
1775     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1776                                /*IsStore*/ false,
1777                                /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1778   case Intrinsic::riscv_vlsseg2:
1779   case Intrinsic::riscv_vlsseg3:
1780   case Intrinsic::riscv_vlsseg4:
1781   case Intrinsic::riscv_vlsseg5:
1782   case Intrinsic::riscv_vlsseg6:
1783   case Intrinsic::riscv_vlsseg7:
1784   case Intrinsic::riscv_vlsseg8:
1785   case Intrinsic::riscv_vloxseg2:
1786   case Intrinsic::riscv_vloxseg3:
1787   case Intrinsic::riscv_vloxseg4:
1788   case Intrinsic::riscv_vloxseg5:
1789   case Intrinsic::riscv_vloxseg6:
1790   case Intrinsic::riscv_vloxseg7:
1791   case Intrinsic::riscv_vloxseg8:
1792   case Intrinsic::riscv_vluxseg2:
1793   case Intrinsic::riscv_vluxseg3:
1794   case Intrinsic::riscv_vluxseg4:
1795   case Intrinsic::riscv_vluxseg5:
1796   case Intrinsic::riscv_vluxseg6:
1797   case Intrinsic::riscv_vluxseg7:
1798   case Intrinsic::riscv_vluxseg8:
1799     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1800                                /*IsStore*/ false,
1801                                /*IsUnitStrided*/ false);
1802   case Intrinsic::riscv_vlsseg2_mask:
1803   case Intrinsic::riscv_vlsseg3_mask:
1804   case Intrinsic::riscv_vlsseg4_mask:
1805   case Intrinsic::riscv_vlsseg5_mask:
1806   case Intrinsic::riscv_vlsseg6_mask:
1807   case Intrinsic::riscv_vlsseg7_mask:
1808   case Intrinsic::riscv_vlsseg8_mask:
1809   case Intrinsic::riscv_vloxseg2_mask:
1810   case Intrinsic::riscv_vloxseg3_mask:
1811   case Intrinsic::riscv_vloxseg4_mask:
1812   case Intrinsic::riscv_vloxseg5_mask:
1813   case Intrinsic::riscv_vloxseg6_mask:
1814   case Intrinsic::riscv_vloxseg7_mask:
1815   case Intrinsic::riscv_vloxseg8_mask:
1816   case Intrinsic::riscv_vluxseg2_mask:
1817   case Intrinsic::riscv_vluxseg3_mask:
1818   case Intrinsic::riscv_vluxseg4_mask:
1819   case Intrinsic::riscv_vluxseg5_mask:
1820   case Intrinsic::riscv_vluxseg6_mask:
1821   case Intrinsic::riscv_vluxseg7_mask:
1822   case Intrinsic::riscv_vluxseg8_mask:
1823     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1824                                /*IsStore*/ false,
1825                                /*IsUnitStrided*/ false);
1826   case Intrinsic::riscv_vsseg2:
1827   case Intrinsic::riscv_vsseg3:
1828   case Intrinsic::riscv_vsseg4:
1829   case Intrinsic::riscv_vsseg5:
1830   case Intrinsic::riscv_vsseg6:
1831   case Intrinsic::riscv_vsseg7:
1832   case Intrinsic::riscv_vsseg8:
1833     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1834                                /*IsStore*/ true,
1835                                /*IsUnitStrided*/ false);
1836   case Intrinsic::riscv_vsseg2_mask:
1837   case Intrinsic::riscv_vsseg3_mask:
1838   case Intrinsic::riscv_vsseg4_mask:
1839   case Intrinsic::riscv_vsseg5_mask:
1840   case Intrinsic::riscv_vsseg6_mask:
1841   case Intrinsic::riscv_vsseg7_mask:
1842   case Intrinsic::riscv_vsseg8_mask:
1843     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1844                                /*IsStore*/ true,
1845                                /*IsUnitStrided*/ false);
1846   case Intrinsic::riscv_vssseg2:
1847   case Intrinsic::riscv_vssseg3:
1848   case Intrinsic::riscv_vssseg4:
1849   case Intrinsic::riscv_vssseg5:
1850   case Intrinsic::riscv_vssseg6:
1851   case Intrinsic::riscv_vssseg7:
1852   case Intrinsic::riscv_vssseg8:
1853   case Intrinsic::riscv_vsoxseg2:
1854   case Intrinsic::riscv_vsoxseg3:
1855   case Intrinsic::riscv_vsoxseg4:
1856   case Intrinsic::riscv_vsoxseg5:
1857   case Intrinsic::riscv_vsoxseg6:
1858   case Intrinsic::riscv_vsoxseg7:
1859   case Intrinsic::riscv_vsoxseg8:
1860   case Intrinsic::riscv_vsuxseg2:
1861   case Intrinsic::riscv_vsuxseg3:
1862   case Intrinsic::riscv_vsuxseg4:
1863   case Intrinsic::riscv_vsuxseg5:
1864   case Intrinsic::riscv_vsuxseg6:
1865   case Intrinsic::riscv_vsuxseg7:
1866   case Intrinsic::riscv_vsuxseg8:
1867     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1868                                /*IsStore*/ true,
1869                                /*IsUnitStrided*/ false);
1870   case Intrinsic::riscv_vssseg2_mask:
1871   case Intrinsic::riscv_vssseg3_mask:
1872   case Intrinsic::riscv_vssseg4_mask:
1873   case Intrinsic::riscv_vssseg5_mask:
1874   case Intrinsic::riscv_vssseg6_mask:
1875   case Intrinsic::riscv_vssseg7_mask:
1876   case Intrinsic::riscv_vssseg8_mask:
1877   case Intrinsic::riscv_vsoxseg2_mask:
1878   case Intrinsic::riscv_vsoxseg3_mask:
1879   case Intrinsic::riscv_vsoxseg4_mask:
1880   case Intrinsic::riscv_vsoxseg5_mask:
1881   case Intrinsic::riscv_vsoxseg6_mask:
1882   case Intrinsic::riscv_vsoxseg7_mask:
1883   case Intrinsic::riscv_vsoxseg8_mask:
1884   case Intrinsic::riscv_vsuxseg2_mask:
1885   case Intrinsic::riscv_vsuxseg3_mask:
1886   case Intrinsic::riscv_vsuxseg4_mask:
1887   case Intrinsic::riscv_vsuxseg5_mask:
1888   case Intrinsic::riscv_vsuxseg6_mask:
1889   case Intrinsic::riscv_vsuxseg7_mask:
1890   case Intrinsic::riscv_vsuxseg8_mask:
1891     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1892                                /*IsStore*/ true,
1893                                /*IsUnitStrided*/ false);
1894   }
1895 }
1896 
1897 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1898                                                 const AddrMode &AM, Type *Ty,
1899                                                 unsigned AS,
1900                                                 Instruction *I) const {
1901   // No global is ever allowed as a base.
1902   if (AM.BaseGV)
1903     return false;
1904 
1905   // None of our addressing modes allows a scalable offset
1906   if (AM.ScalableOffset)
1907     return false;
1908 
1909   // RVV instructions only support register addressing.
1910   if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1911     return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1912 
1913   // Require a 12-bit signed offset.
1914   if (!isInt<12>(AM.BaseOffs))
1915     return false;
1916 
1917   switch (AM.Scale) {
1918   case 0: // "r+i" or just "i", depending on HasBaseReg.
1919     break;
1920   case 1:
1921     if (!AM.HasBaseReg) // allow "r+i".
1922       break;
1923     return false; // disallow "r+r" or "r+r+i".
1924   default:
1925     return false;
1926   }
1927 
1928   return true;
1929 }
1930 
1931 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1932   return isInt<12>(Imm);
1933 }
1934 
1935 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1936   return isInt<12>(Imm);
1937 }
1938 
1939 // On RV32, 64-bit integers are split into their high and low parts and held
1940 // in two different registers, so the trunc is free since the low register can
1941 // just be used.
1942 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1943 // isTruncateFree?
1944 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1945   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1946     return false;
1947   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1948   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1949   return (SrcBits == 64 && DestBits == 32);
1950 }
1951 
1952 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1953   // We consider i64->i32 free on RV64 since we have good selection of W
1954   // instructions that make promoting operations back to i64 free in many cases.
1955   if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1956       !DstVT.isInteger())
1957     return false;
1958   unsigned SrcBits = SrcVT.getSizeInBits();
1959   unsigned DestBits = DstVT.getSizeInBits();
1960   return (SrcBits == 64 && DestBits == 32);
1961 }
1962 
1963 bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const {
1964   EVT SrcVT = Val.getValueType();
1965   // free truncate from vnsrl and vnsra
1966   if (Subtarget.hasVInstructions() &&
1967       (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1968       SrcVT.isVector() && VT2.isVector()) {
1969     unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1970     unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1971     if (SrcBits == DestBits * 2) {
1972       return true;
1973     }
1974   }
1975   return TargetLowering::isTruncateFree(Val, VT2);
1976 }
1977 
1978 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1979   // Zexts are free if they can be combined with a load.
1980   // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1981   // poorly with type legalization of compares preferring sext.
1982   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1983     EVT MemVT = LD->getMemoryVT();
1984     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1985         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1986          LD->getExtensionType() == ISD::ZEXTLOAD))
1987       return true;
1988   }
1989 
1990   return TargetLowering::isZExtFree(Val, VT2);
1991 }
1992 
1993 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1994   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1995 }
1996 
1997 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1998   return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1999 }
2000 
2001 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
2002   return Subtarget.hasStdExtZbb() ||
2003          (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2004 }
2005 
2006 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
2007   return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2008          (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2009 }
2010 
2011 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
2012     const Instruction &AndI) const {
2013   // We expect to be able to match a bit extraction instruction if the Zbs
2014   // extension is supported and the mask is a power of two. However, we
2015   // conservatively return false if the mask would fit in an ANDI instruction,
2016   // on the basis that it's possible the sinking+duplication of the AND in
2017   // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2018   // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2019   if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2020     return false;
2021   ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2022   if (!Mask)
2023     return false;
2024   return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2025 }
2026 
2027 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
2028   EVT VT = Y.getValueType();
2029 
2030   // FIXME: Support vectors once we have tests.
2031   if (VT.isVector())
2032     return false;
2033 
2034   return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2035          (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2036 }
2037 
2038 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
2039   // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2040   if (Subtarget.hasStdExtZbs())
2041     return X.getValueType().isScalarInteger();
2042   auto *C = dyn_cast<ConstantSDNode>(Y);
2043   // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2044   if (Subtarget.hasVendorXTHeadBs())
2045     return C != nullptr;
2046   // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2047   return C && C->getAPIntValue().ule(10);
2048 }
2049 
2050 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
2051                                                                EVT VT) const {
2052   // Only enable for rvv.
2053   if (!VT.isVector() || !Subtarget.hasVInstructions())
2054     return false;
2055 
2056   if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2057     return false;
2058 
2059   return true;
2060 }
2061 
2062 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
2063                                                             Type *Ty) const {
2064   assert(Ty->isIntegerTy());
2065 
2066   unsigned BitSize = Ty->getIntegerBitWidth();
2067   if (BitSize > Subtarget.getXLen())
2068     return false;
2069 
2070   // Fast path, assume 32-bit immediates are cheap.
2071   int64_t Val = Imm.getSExtValue();
2072   if (isInt<32>(Val))
2073     return true;
2074 
2075   // A constant pool entry may be more aligned thant he load we're trying to
2076   // replace. If we don't support unaligned scalar mem, prefer the constant
2077   // pool.
2078   // TODO: Can the caller pass down the alignment?
2079   if (!Subtarget.enableUnalignedScalarMem())
2080     return true;
2081 
2082   // Prefer to keep the load if it would require many instructions.
2083   // This uses the same threshold we use for constant pools but doesn't
2084   // check useConstantPoolForLargeInts.
2085   // TODO: Should we keep the load only when we're definitely going to emit a
2086   // constant pool?
2087 
2088   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget);
2089   return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2090 }
2091 
2092 bool RISCVTargetLowering::
2093     shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
2094         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
2095         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2096         SelectionDAG &DAG) const {
2097   // One interesting pattern that we'd want to form is 'bit extract':
2098   //   ((1 >> Y) & 1) ==/!= 0
2099   // But we also need to be careful not to try to reverse that fold.
2100 
2101   // Is this '((1 >> Y) & 1)'?
2102   if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2103     return false; // Keep the 'bit extract' pattern.
2104 
2105   // Will this be '((1 >> Y) & 1)' after the transform?
2106   if (NewShiftOpcode == ISD::SRL && CC->isOne())
2107     return true; // Do form the 'bit extract' pattern.
2108 
2109   // If 'X' is a constant, and we transform, then we will immediately
2110   // try to undo the fold, thus causing endless combine loop.
2111   // So only do the transform if X is not a constant. This matches the default
2112   // implementation of this function.
2113   return !XC;
2114 }
2115 
2116 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
2117   unsigned Opc = VecOp.getOpcode();
2118 
2119   // Assume target opcodes can't be scalarized.
2120   // TODO - do we have any exceptions?
2121   if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2122     return false;
2123 
2124   // If the vector op is not supported, try to convert to scalar.
2125   EVT VecVT = VecOp.getValueType();
2126   if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2127     return true;
2128 
2129   // If the vector op is supported, but the scalar op is not, the transform may
2130   // not be worthwhile.
2131   // Permit a vector binary operation can be converted to scalar binary
2132   // operation which is custom lowered with illegal type.
2133   EVT ScalarVT = VecVT.getScalarType();
2134   return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2135          isOperationCustom(Opc, ScalarVT);
2136 }
2137 
2138 bool RISCVTargetLowering::isOffsetFoldingLegal(
2139     const GlobalAddressSDNode *GA) const {
2140   // In order to maximise the opportunity for common subexpression elimination,
2141   // keep a separate ADD node for the global address offset instead of folding
2142   // it in the global address node. Later peephole optimisations may choose to
2143   // fold it back in when profitable.
2144   return false;
2145 }
2146 
2147 // Returns 0-31 if the fli instruction is available for the type and this is
2148 // legal FP immediate for the type. Returns -1 otherwise.
2149 int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
2150   if (!Subtarget.hasStdExtZfa())
2151     return -1;
2152 
2153   bool IsSupportedVT = false;
2154   if (VT == MVT::f16) {
2155     IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2156   } else if (VT == MVT::f32) {
2157     IsSupportedVT = true;
2158   } else if (VT == MVT::f64) {
2159     assert(Subtarget.hasStdExtD() && "Expect D extension");
2160     IsSupportedVT = true;
2161   }
2162 
2163   if (!IsSupportedVT)
2164     return -1;
2165 
2166   return RISCVLoadFPImm::getLoadFPImm(Imm);
2167 }
2168 
2169 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2170                                        bool ForCodeSize) const {
2171   bool IsLegalVT = false;
2172   if (VT == MVT::f16)
2173     IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2174   else if (VT == MVT::f32)
2175     IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2176   else if (VT == MVT::f64)
2177     IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2178   else if (VT == MVT::bf16)
2179     IsLegalVT = Subtarget.hasStdExtZfbfmin();
2180 
2181   if (!IsLegalVT)
2182     return false;
2183 
2184   if (getLegalZfaFPImm(Imm, VT) >= 0)
2185     return true;
2186 
2187   // Cannot create a 64 bit floating-point immediate value for rv32.
2188   if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2189     // td can handle +0.0 or -0.0 already.
2190     // -0.0 can be created by fmv + fneg.
2191     return Imm.isZero();
2192   }
2193 
2194   // Special case: fmv + fneg
2195   if (Imm.isNegZero())
2196     return true;
2197 
2198   // Building an integer and then converting requires a fmv at the end of
2199   // the integer sequence. The fmv is not required for Zfinx.
2200   const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2201   const int Cost =
2202       FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2203                                            Subtarget.getXLen(), Subtarget);
2204   return Cost <= FPImmCost;
2205 }
2206 
2207 // TODO: This is very conservative.
2208 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2209                                                   unsigned Index) const {
2210   if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
2211     return false;
2212 
2213   // Only support extracting a fixed from a fixed vector for now.
2214   if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2215     return false;
2216 
2217   EVT EltVT = ResVT.getVectorElementType();
2218   assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2219 
2220   // The smallest type we can slide is i8.
2221   // TODO: We can extract index 0 from a mask vector without a slide.
2222   if (EltVT == MVT::i1)
2223     return false;
2224 
2225   unsigned ResElts = ResVT.getVectorNumElements();
2226   unsigned SrcElts = SrcVT.getVectorNumElements();
2227 
2228   unsigned MinVLen = Subtarget.getRealMinVLen();
2229   unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2230 
2231   // If we're extracting only data from the first VLEN bits of the source
2232   // then we can always do this with an m1 vslidedown.vx.  Restricting the
2233   // Index ensures we can use a vslidedown.vi.
2234   // TODO: We can generalize this when the exact VLEN is known.
2235   if (Index + ResElts <= MinVLMAX && Index < 31)
2236     return true;
2237 
2238   // Convervatively only handle extracting half of a vector.
2239   // TODO: We can do arbitrary slidedowns, but for now only support extracting
2240   // the upper half of a vector until we have more test coverage.
2241   // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2242   // a cheap extract.  However, this case is important in practice for
2243   // shuffled extracts of longer vectors.  How resolve?
2244   return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2245 }
2246 
2247 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2248                                                       CallingConv::ID CC,
2249                                                       EVT VT) const {
2250   // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2251   // We might still end up using a GPR but that will be decided based on ABI.
2252   if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2253       !Subtarget.hasStdExtZfhminOrZhinxmin())
2254     return MVT::f32;
2255 
2256   MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2257 
2258   return PartVT;
2259 }
2260 
2261 unsigned
2262 RISCVTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT,
2263                                      std::optional<MVT> RegisterVT) const {
2264   // Pair inline assembly operand
2265   if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2266       *RegisterVT == MVT::Untyped)
2267     return 1;
2268 
2269   return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2270 }
2271 
2272 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2273                                                            CallingConv::ID CC,
2274                                                            EVT VT) const {
2275   // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2276   // We might still end up using a GPR but that will be decided based on ABI.
2277   if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2278       !Subtarget.hasStdExtZfhminOrZhinxmin())
2279     return 1;
2280 
2281   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2282 }
2283 
2284 unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2285     LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2286     unsigned &NumIntermediates, MVT &RegisterVT) const {
2287   unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
2288       Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2289 
2290   return NumRegs;
2291 }
2292 
2293 // Changes the condition code and swaps operands if necessary, so the SetCC
2294 // operation matches one of the comparisons supported directly by branches
2295 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2296 // with 1/-1.
2297 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2298                                     ISD::CondCode &CC, SelectionDAG &DAG) {
2299   // If this is a single bit test that can't be handled by ANDI, shift the
2300   // bit to be tested to the MSB and perform a signed compare with 0.
2301   if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2302       LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2303       isa<ConstantSDNode>(LHS.getOperand(1))) {
2304     uint64_t Mask = LHS.getConstantOperandVal(1);
2305     if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2306       unsigned ShAmt = 0;
2307       if (isPowerOf2_64(Mask)) {
2308         CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2309         ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2310       } else {
2311         ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2312       }
2313 
2314       LHS = LHS.getOperand(0);
2315       if (ShAmt != 0)
2316         LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2317                           DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2318       return;
2319     }
2320   }
2321 
2322   if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2323     int64_t C = RHSC->getSExtValue();
2324     switch (CC) {
2325     default: break;
2326     case ISD::SETGT:
2327       // Convert X > -1 to X >= 0.
2328       if (C == -1) {
2329         RHS = DAG.getConstant(0, DL, RHS.getValueType());
2330         CC = ISD::SETGE;
2331         return;
2332       }
2333       break;
2334     case ISD::SETLT:
2335       // Convert X < 1 to 0 >= X.
2336       if (C == 1) {
2337         RHS = LHS;
2338         LHS = DAG.getConstant(0, DL, RHS.getValueType());
2339         CC = ISD::SETGE;
2340         return;
2341       }
2342       break;
2343     }
2344   }
2345 
2346   switch (CC) {
2347   default:
2348     break;
2349   case ISD::SETGT:
2350   case ISD::SETLE:
2351   case ISD::SETUGT:
2352   case ISD::SETULE:
2353     CC = ISD::getSetCCSwappedOperands(CC);
2354     std::swap(LHS, RHS);
2355     break;
2356   }
2357 }
2358 
2359 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2360   if (VT.isRISCVVectorTuple()) {
2361     if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2362         VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2363       return RISCVII::LMUL_F8;
2364     if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2365         VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2366       return RISCVII::LMUL_F4;
2367     if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2368         VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2369       return RISCVII::LMUL_F2;
2370     if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2371         VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2372       return RISCVII::LMUL_1;
2373     if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2374         VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2375       return RISCVII::LMUL_2;
2376     if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2377       return RISCVII::LMUL_4;
2378     llvm_unreachable("Invalid vector tuple type LMUL.");
2379   }
2380 
2381   assert(VT.isScalableVector() && "Expecting a scalable vector type");
2382   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2383   if (VT.getVectorElementType() == MVT::i1)
2384     KnownSize *= 8;
2385 
2386   switch (KnownSize) {
2387   default:
2388     llvm_unreachable("Invalid LMUL.");
2389   case 8:
2390     return RISCVII::VLMUL::LMUL_F8;
2391   case 16:
2392     return RISCVII::VLMUL::LMUL_F4;
2393   case 32:
2394     return RISCVII::VLMUL::LMUL_F2;
2395   case 64:
2396     return RISCVII::VLMUL::LMUL_1;
2397   case 128:
2398     return RISCVII::VLMUL::LMUL_2;
2399   case 256:
2400     return RISCVII::VLMUL::LMUL_4;
2401   case 512:
2402     return RISCVII::VLMUL::LMUL_8;
2403   }
2404 }
2405 
2406 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2407   switch (LMul) {
2408   default:
2409     llvm_unreachable("Invalid LMUL.");
2410   case RISCVII::VLMUL::LMUL_F8:
2411   case RISCVII::VLMUL::LMUL_F4:
2412   case RISCVII::VLMUL::LMUL_F2:
2413   case RISCVII::VLMUL::LMUL_1:
2414     return RISCV::VRRegClassID;
2415   case RISCVII::VLMUL::LMUL_2:
2416     return RISCV::VRM2RegClassID;
2417   case RISCVII::VLMUL::LMUL_4:
2418     return RISCV::VRM4RegClassID;
2419   case RISCVII::VLMUL::LMUL_8:
2420     return RISCV::VRM8RegClassID;
2421   }
2422 }
2423 
2424 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2425   RISCVII::VLMUL LMUL = getLMUL(VT);
2426   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2427       LMUL == RISCVII::VLMUL::LMUL_F4 ||
2428       LMUL == RISCVII::VLMUL::LMUL_F2 ||
2429       LMUL == RISCVII::VLMUL::LMUL_1) {
2430     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2431                   "Unexpected subreg numbering");
2432     return RISCV::sub_vrm1_0 + Index;
2433   }
2434   if (LMUL == RISCVII::VLMUL::LMUL_2) {
2435     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2436                   "Unexpected subreg numbering");
2437     return RISCV::sub_vrm2_0 + Index;
2438   }
2439   if (LMUL == RISCVII::VLMUL::LMUL_4) {
2440     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2441                   "Unexpected subreg numbering");
2442     return RISCV::sub_vrm4_0 + Index;
2443   }
2444   llvm_unreachable("Invalid vector type.");
2445 }
2446 
2447 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2448   if (VT.isRISCVVectorTuple()) {
2449     unsigned NF = VT.getRISCVVectorTupleNumFields();
2450     unsigned RegsPerField =
2451         std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2452                          (NF * RISCV::RVVBitsPerBlock));
2453     switch (RegsPerField) {
2454     case 1:
2455       if (NF == 2)
2456         return RISCV::VRN2M1RegClassID;
2457       if (NF == 3)
2458         return RISCV::VRN3M1RegClassID;
2459       if (NF == 4)
2460         return RISCV::VRN4M1RegClassID;
2461       if (NF == 5)
2462         return RISCV::VRN5M1RegClassID;
2463       if (NF == 6)
2464         return RISCV::VRN6M1RegClassID;
2465       if (NF == 7)
2466         return RISCV::VRN7M1RegClassID;
2467       if (NF == 8)
2468         return RISCV::VRN8M1RegClassID;
2469       break;
2470     case 2:
2471       if (NF == 2)
2472         return RISCV::VRN2M2RegClassID;
2473       if (NF == 3)
2474         return RISCV::VRN3M2RegClassID;
2475       if (NF == 4)
2476         return RISCV::VRN4M2RegClassID;
2477       break;
2478     case 4:
2479       assert(NF == 2);
2480       return RISCV::VRN2M4RegClassID;
2481     default:
2482       break;
2483     }
2484     llvm_unreachable("Invalid vector tuple type RegClass.");
2485   }
2486 
2487   if (VT.getVectorElementType() == MVT::i1)
2488     return RISCV::VRRegClassID;
2489   return getRegClassIDForLMUL(getLMUL(VT));
2490 }
2491 
2492 // Attempt to decompose a subvector insert/extract between VecVT and
2493 // SubVecVT via subregister indices. Returns the subregister index that
2494 // can perform the subvector insert/extract with the given element index, as
2495 // well as the index corresponding to any leftover subvectors that must be
2496 // further inserted/extracted within the register class for SubVecVT.
2497 std::pair<unsigned, unsigned>
2498 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2499     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2500     const RISCVRegisterInfo *TRI) {
2501   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2502                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2503                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2504                 "Register classes not ordered");
2505   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2506   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2507 
2508   // If VecVT is a vector tuple type, either it's the tuple type with same
2509   // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2510   if (VecVT.isRISCVVectorTuple()) {
2511     if (VecRegClassID == SubRegClassID)
2512       return {RISCV::NoSubRegister, 0};
2513 
2514     assert(SubVecVT.isScalableVector() &&
2515            "Only allow scalable vector subvector.");
2516     assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2517            "Invalid vector tuple insert/extract for vector and subvector with "
2518            "different LMUL.");
2519     return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2520   }
2521 
2522   // Try to compose a subregister index that takes us from the incoming
2523   // LMUL>1 register class down to the outgoing one. At each step we half
2524   // the LMUL:
2525   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2526   // Note that this is not guaranteed to find a subregister index, such as
2527   // when we are extracting from one VR type to another.
2528   unsigned SubRegIdx = RISCV::NoSubRegister;
2529   for (const unsigned RCID :
2530        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2531     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2532       VecVT = VecVT.getHalfNumVectorElementsVT();
2533       bool IsHi =
2534           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2535       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2536                                             getSubregIndexByMVT(VecVT, IsHi));
2537       if (IsHi)
2538         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2539     }
2540   return {SubRegIdx, InsertExtractIdx};
2541 }
2542 
2543 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2544 // stores for those types.
2545 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2546   return !Subtarget.useRVVForFixedLengthVectors() ||
2547          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2548 }
2549 
2550 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2551   if (!ScalarTy.isSimple())
2552     return false;
2553   switch (ScalarTy.getSimpleVT().SimpleTy) {
2554   case MVT::iPTR:
2555     return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2556   case MVT::i8:
2557   case MVT::i16:
2558   case MVT::i32:
2559     return true;
2560   case MVT::i64:
2561     return Subtarget.hasVInstructionsI64();
2562   case MVT::f16:
2563     return Subtarget.hasVInstructionsF16Minimal();
2564   case MVT::bf16:
2565     return Subtarget.hasVInstructionsBF16Minimal();
2566   case MVT::f32:
2567     return Subtarget.hasVInstructionsF32();
2568   case MVT::f64:
2569     return Subtarget.hasVInstructionsF64();
2570   default:
2571     return false;
2572   }
2573 }
2574 
2575 
2576 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2577   return NumRepeatedDivisors;
2578 }
2579 
2580 static SDValue getVLOperand(SDValue Op) {
2581   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2582           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2583          "Unexpected opcode");
2584   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2585   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2586   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2587       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2588   if (!II)
2589     return SDValue();
2590   return Op.getOperand(II->VLOperand + 1 + HasChain);
2591 }
2592 
2593 static bool useRVVForFixedLengthVectorVT(MVT VT,
2594                                          const RISCVSubtarget &Subtarget) {
2595   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2596   if (!Subtarget.useRVVForFixedLengthVectors())
2597     return false;
2598 
2599   // We only support a set of vector types with a consistent maximum fixed size
2600   // across all supported vector element types to avoid legalization issues.
2601   // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2602   // fixed-length vector type we support is 1024 bytes.
2603   if (VT.getFixedSizeInBits() > 1024 * 8)
2604     return false;
2605 
2606   unsigned MinVLen = Subtarget.getRealMinVLen();
2607 
2608   MVT EltVT = VT.getVectorElementType();
2609 
2610   // Don't use RVV for vectors we cannot scalarize if required.
2611   switch (EltVT.SimpleTy) {
2612   // i1 is supported but has different rules.
2613   default:
2614     return false;
2615   case MVT::i1:
2616     // Masks can only use a single register.
2617     if (VT.getVectorNumElements() > MinVLen)
2618       return false;
2619     MinVLen /= 8;
2620     break;
2621   case MVT::i8:
2622   case MVT::i16:
2623   case MVT::i32:
2624     break;
2625   case MVT::i64:
2626     if (!Subtarget.hasVInstructionsI64())
2627       return false;
2628     break;
2629   case MVT::f16:
2630     if (!Subtarget.hasVInstructionsF16Minimal())
2631       return false;
2632     break;
2633   case MVT::bf16:
2634     if (!Subtarget.hasVInstructionsBF16Minimal())
2635       return false;
2636     break;
2637   case MVT::f32:
2638     if (!Subtarget.hasVInstructionsF32())
2639       return false;
2640     break;
2641   case MVT::f64:
2642     if (!Subtarget.hasVInstructionsF64())
2643       return false;
2644     break;
2645   }
2646 
2647   // Reject elements larger than ELEN.
2648   if (EltVT.getSizeInBits() > Subtarget.getELen())
2649     return false;
2650 
2651   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2652   // Don't use RVV for types that don't fit.
2653   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2654     return false;
2655 
2656   // TODO: Perhaps an artificial restriction, but worth having whilst getting
2657   // the base fixed length RVV support in place.
2658   if (!VT.isPow2VectorType())
2659     return false;
2660 
2661   return true;
2662 }
2663 
2664 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2665   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2666 }
2667 
2668 // Return the largest legal scalable vector type that matches VT's element type.
2669 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2670                                             const RISCVSubtarget &Subtarget) {
2671   // This may be called before legal types are setup.
2672   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2673           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2674          "Expected legal fixed length vector!");
2675 
2676   unsigned MinVLen = Subtarget.getRealMinVLen();
2677   unsigned MaxELen = Subtarget.getELen();
2678 
2679   MVT EltVT = VT.getVectorElementType();
2680   switch (EltVT.SimpleTy) {
2681   default:
2682     llvm_unreachable("unexpected element type for RVV container");
2683   case MVT::i1:
2684   case MVT::i8:
2685   case MVT::i16:
2686   case MVT::i32:
2687   case MVT::i64:
2688   case MVT::bf16:
2689   case MVT::f16:
2690   case MVT::f32:
2691   case MVT::f64: {
2692     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2693     // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2694     // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2695     unsigned NumElts =
2696         (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2697     NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2698     assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2699     return MVT::getScalableVectorVT(EltVT, NumElts);
2700   }
2701   }
2702 }
2703 
2704 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2705                                             const RISCVSubtarget &Subtarget) {
2706   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
2707                                           Subtarget);
2708 }
2709 
2710 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2711   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2712 }
2713 
2714 // Grow V to consume an entire RVV register.
2715 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2716                                        const RISCVSubtarget &Subtarget) {
2717   assert(VT.isScalableVector() &&
2718          "Expected to convert into a scalable vector!");
2719   assert(V.getValueType().isFixedLengthVector() &&
2720          "Expected a fixed length vector operand!");
2721   SDLoc DL(V);
2722   SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2723   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2724 }
2725 
2726 // Shrink V so it's just big enough to maintain a VT's worth of data.
2727 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2728                                          const RISCVSubtarget &Subtarget) {
2729   assert(VT.isFixedLengthVector() &&
2730          "Expected to convert into a fixed length vector!");
2731   assert(V.getValueType().isScalableVector() &&
2732          "Expected a scalable vector operand!");
2733   SDLoc DL(V);
2734   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2735   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2736 }
2737 
2738 /// Return the type of the mask type suitable for masking the provided
2739 /// vector type.  This is simply an i1 element type vector of the same
2740 /// (possibly scalable) length.
2741 static MVT getMaskTypeFor(MVT VecVT) {
2742   assert(VecVT.isVector());
2743   ElementCount EC = VecVT.getVectorElementCount();
2744   return MVT::getVectorVT(MVT::i1, EC);
2745 }
2746 
2747 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2748 /// vector length VL.  .
2749 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2750                               SelectionDAG &DAG) {
2751   MVT MaskVT = getMaskTypeFor(VecVT);
2752   return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2753 }
2754 
2755 static std::pair<SDValue, SDValue>
2756 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2757                         const RISCVSubtarget &Subtarget) {
2758   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2759   SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2760   SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2761   return {Mask, VL};
2762 }
2763 
2764 static std::pair<SDValue, SDValue>
2765 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2766                 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2767   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2768   SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2769   SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2770   return {Mask, VL};
2771 }
2772 
2773 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2774 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2775 // the vector type that the fixed-length vector is contained in. Otherwise if
2776 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2777 static std::pair<SDValue, SDValue>
2778 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2779                 const RISCVSubtarget &Subtarget) {
2780   if (VecVT.isFixedLengthVector())
2781     return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2782                            Subtarget);
2783   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2784   return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2785 }
2786 
2787 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2788                                           SelectionDAG &DAG) const {
2789   assert(VecVT.isScalableVector() && "Expected scalable vector");
2790   return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2791                              VecVT.getVectorElementCount());
2792 }
2793 
2794 std::pair<unsigned, unsigned>
2795 RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,
2796                                         const RISCVSubtarget &Subtarget) {
2797   assert(VecVT.isScalableVector() && "Expected scalable vector");
2798 
2799   unsigned EltSize = VecVT.getScalarSizeInBits();
2800   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2801 
2802   unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2803   unsigned MaxVLMAX =
2804       RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2805 
2806   unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2807   unsigned MinVLMAX =
2808       RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2809 
2810   return std::make_pair(MinVLMAX, MaxVLMAX);
2811 }
2812 
2813 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2814 // of either is (currently) supported. This can get us into an infinite loop
2815 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2816 // as a ..., etc.
2817 // Until either (or both) of these can reliably lower any node, reporting that
2818 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2819 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2820 // which is not desirable.
2821 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2822     EVT VT, unsigned DefinedValues) const {
2823   return false;
2824 }
2825 
2826 InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
2827   // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2828   // implementation-defined.
2829   if (!VT.isVector())
2830     return InstructionCost::getInvalid();
2831   unsigned DLenFactor = Subtarget.getDLenFactor();
2832   unsigned Cost;
2833   if (VT.isScalableVector()) {
2834     unsigned LMul;
2835     bool Fractional;
2836     std::tie(LMul, Fractional) =
2837         RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
2838     if (Fractional)
2839       Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2840     else
2841       Cost = (LMul * DLenFactor);
2842   } else {
2843     Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2844   }
2845   return Cost;
2846 }
2847 
2848 
2849 /// Return the cost of a vrgather.vv instruction for the type VT.  vrgather.vv
2850 /// is generally quadratic in the number of vreg implied by LMUL.  Note that
2851 /// operand (index and possibly mask) are handled separately.
2852 InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
2853   return getLMULCost(VT) * getLMULCost(VT);
2854 }
2855 
2856 /// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2857 /// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2858 /// or may track the vrgather.vv cost. It is implementation-dependent.
2859 InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
2860   return getLMULCost(VT);
2861 }
2862 
2863 /// Return the cost of a vslidedown.vx or vslideup.vx instruction
2864 /// for the type VT.  (This does not cover the vslide1up or vslide1down
2865 /// variants.)  Slides may be linear in the number of vregs implied by LMUL,
2866 /// or may track the vrgather.vv cost. It is implementation-dependent.
2867 InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const {
2868   return getLMULCost(VT);
2869 }
2870 
2871 /// Return the cost of a vslidedown.vi or vslideup.vi instruction
2872 /// for the type VT.  (This does not cover the vslide1up or vslide1down
2873 /// variants.)  Slides may be linear in the number of vregs implied by LMUL,
2874 /// or may track the vrgather.vv cost. It is implementation-dependent.
2875 InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const {
2876   return getLMULCost(VT);
2877 }
2878 
2879 static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2880                               const RISCVSubtarget &Subtarget) {
2881   // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2882   // bf16 conversions are always promoted to f32.
2883   if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2884       Op.getValueType() == MVT::bf16) {
2885     bool IsStrict = Op->isStrictFPOpcode();
2886 
2887     SDLoc DL(Op);
2888     if (IsStrict) {
2889       SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2890                                 {Op.getOperand(0), Op.getOperand(1)});
2891       return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2892                          {Op.getValueType(), MVT::Other},
2893                          {Val.getValue(1), Val.getValue(0),
2894                           DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2895     }
2896     return DAG.getNode(
2897         ISD::FP_ROUND, DL, Op.getValueType(),
2898         DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2899         DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2900   }
2901 
2902   // Other operations are legal.
2903   return Op;
2904 }
2905 
2906 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
2907                                   const RISCVSubtarget &Subtarget) {
2908   // RISC-V FP-to-int conversions saturate to the destination register size, but
2909   // don't produce 0 for nan. We can use a conversion instruction and fix the
2910   // nan case with a compare and a select.
2911   SDValue Src = Op.getOperand(0);
2912 
2913   MVT DstVT = Op.getSimpleValueType();
2914   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2915 
2916   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2917 
2918   if (!DstVT.isVector()) {
2919     // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2920     // the result.
2921     if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2922         Src.getValueType() == MVT::bf16) {
2923       Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2924     }
2925 
2926     unsigned Opc;
2927     if (SatVT == DstVT)
2928       Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2929     else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2930       Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
2931     else
2932       return SDValue();
2933     // FIXME: Support other SatVTs by clamping before or after the conversion.
2934 
2935     SDLoc DL(Op);
2936     SDValue FpToInt = DAG.getNode(
2937         Opc, DL, DstVT, Src,
2938         DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
2939 
2940     if (Opc == RISCVISD::FCVT_WU_RV64)
2941       FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2942 
2943     SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2944     return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2945                            ISD::CondCode::SETUO);
2946   }
2947 
2948   // Vectors.
2949 
2950   MVT DstEltVT = DstVT.getVectorElementType();
2951   MVT SrcVT = Src.getSimpleValueType();
2952   MVT SrcEltVT = SrcVT.getVectorElementType();
2953   unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2954   unsigned DstEltSize = DstEltVT.getSizeInBits();
2955 
2956   // Only handle saturating to the destination type.
2957   if (SatVT != DstEltVT)
2958     return SDValue();
2959 
2960   MVT DstContainerVT = DstVT;
2961   MVT SrcContainerVT = SrcVT;
2962   if (DstVT.isFixedLengthVector()) {
2963     DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2964     SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2965     assert(DstContainerVT.getVectorElementCount() ==
2966                SrcContainerVT.getVectorElementCount() &&
2967            "Expected same element count");
2968     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2969   }
2970 
2971   SDLoc DL(Op);
2972 
2973   auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2974 
2975   SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2976                               {Src, Src, DAG.getCondCode(ISD::SETNE),
2977                                DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2978 
2979   // Need to widen by more than 1 step, promote the FP type, then do a widening
2980   // convert.
2981   if (DstEltSize > (2 * SrcEltSize)) {
2982     assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2983     MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2984     Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2985   }
2986 
2987   MVT CvtContainerVT = DstContainerVT;
2988   MVT CvtEltVT = DstEltVT;
2989   if (SrcEltSize > (2 * DstEltSize)) {
2990     CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2991     CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2992   }
2993 
2994   unsigned RVVOpc =
2995       IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
2996   SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2997 
2998   while (CvtContainerVT != DstContainerVT) {
2999     CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3000     CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3001     // Rounding mode here is arbitrary since we aren't shifting out any bits.
3002     unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3003                                 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3004     Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3005   }
3006 
3007   SDValue SplatZero = DAG.getNode(
3008       RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3009       DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3010   Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3011                     Res, DAG.getUNDEF(DstContainerVT), VL);
3012 
3013   if (DstVT.isFixedLengthVector())
3014     Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3015 
3016   return Res;
3017 }
3018 
3019 static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
3020                               const RISCVSubtarget &Subtarget) {
3021   bool IsStrict = Op->isStrictFPOpcode();
3022   SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3023 
3024   // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3025   // bf16 conversions are always promoted to f32.
3026   if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3027       SrcVal.getValueType() == MVT::bf16) {
3028     SDLoc DL(Op);
3029     if (IsStrict) {
3030       SDValue Ext =
3031           DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3032                       {Op.getOperand(0), SrcVal});
3033       return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3034                          {Ext.getValue(1), Ext.getValue(0)});
3035     }
3036     return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3037                        DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3038   }
3039 
3040   // Other operations are legal.
3041   return Op;
3042 }
3043 
3044 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
3045   switch (Opc) {
3046   case ISD::FROUNDEVEN:
3047   case ISD::STRICT_FROUNDEVEN:
3048   case ISD::VP_FROUNDEVEN:
3049     return RISCVFPRndMode::RNE;
3050   case ISD::FTRUNC:
3051   case ISD::STRICT_FTRUNC:
3052   case ISD::VP_FROUNDTOZERO:
3053     return RISCVFPRndMode::RTZ;
3054   case ISD::FFLOOR:
3055   case ISD::STRICT_FFLOOR:
3056   case ISD::VP_FFLOOR:
3057     return RISCVFPRndMode::RDN;
3058   case ISD::FCEIL:
3059   case ISD::STRICT_FCEIL:
3060   case ISD::VP_FCEIL:
3061     return RISCVFPRndMode::RUP;
3062   case ISD::FROUND:
3063   case ISD::STRICT_FROUND:
3064   case ISD::VP_FROUND:
3065     return RISCVFPRndMode::RMM;
3066   case ISD::FRINT:
3067   case ISD::VP_FRINT:
3068     return RISCVFPRndMode::DYN;
3069   }
3070 
3071   return RISCVFPRndMode::Invalid;
3072 }
3073 
3074 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3075 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3076 // the integer domain and back. Taking care to avoid converting values that are
3077 // nan or already correct.
3078 static SDValue
3079 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3080                                       const RISCVSubtarget &Subtarget) {
3081   MVT VT = Op.getSimpleValueType();
3082   assert(VT.isVector() && "Unexpected type");
3083 
3084   SDLoc DL(Op);
3085 
3086   SDValue Src = Op.getOperand(0);
3087 
3088   MVT ContainerVT = VT;
3089   if (VT.isFixedLengthVector()) {
3090     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3091     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3092   }
3093 
3094   SDValue Mask, VL;
3095   if (Op->isVPOpcode()) {
3096     Mask = Op.getOperand(1);
3097     if (VT.isFixedLengthVector())
3098       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3099                                      Subtarget);
3100     VL = Op.getOperand(2);
3101   } else {
3102     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3103   }
3104 
3105   // Freeze the source since we are increasing the number of uses.
3106   Src = DAG.getFreeze(Src);
3107 
3108   // We do the conversion on the absolute value and fix the sign at the end.
3109   SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3110 
3111   // Determine the largest integer that can be represented exactly. This and
3112   // values larger than it don't have any fractional bits so don't need to
3113   // be converted.
3114   const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3115   unsigned Precision = APFloat::semanticsPrecision(FltSem);
3116   APFloat MaxVal = APFloat(FltSem);
3117   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3118                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3119   SDValue MaxValNode =
3120       DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3121   SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3122                                     DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3123 
3124   // If abs(Src) was larger than MaxVal or nan, keep it.
3125   MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3126   Mask =
3127       DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3128                   {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3129                    Mask, Mask, VL});
3130 
3131   // Truncate to integer and convert back to FP.
3132   MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3133   MVT XLenVT = Subtarget.getXLenVT();
3134   SDValue Truncated;
3135 
3136   switch (Op.getOpcode()) {
3137   default:
3138     llvm_unreachable("Unexpected opcode");
3139   case ISD::FRINT:
3140   case ISD::VP_FRINT:
3141   case ISD::FCEIL:
3142   case ISD::VP_FCEIL:
3143   case ISD::FFLOOR:
3144   case ISD::VP_FFLOOR:
3145   case ISD::FROUND:
3146   case ISD::FROUNDEVEN:
3147   case ISD::VP_FROUND:
3148   case ISD::VP_FROUNDEVEN:
3149   case ISD::VP_FROUNDTOZERO: {
3150     RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
3151     assert(FRM != RISCVFPRndMode::Invalid);
3152     Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3153                             DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3154     break;
3155   }
3156   case ISD::FTRUNC:
3157     Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3158                             Mask, VL);
3159     break;
3160   case ISD::FNEARBYINT:
3161   case ISD::VP_FNEARBYINT:
3162     Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3163                             Mask, VL);
3164     break;
3165   }
3166 
3167   // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3168   if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3169     Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3170                             Mask, VL);
3171 
3172   // Restore the original sign so that -0.0 is preserved.
3173   Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3174                           Src, Src, Mask, VL);
3175 
3176   if (!VT.isFixedLengthVector())
3177     return Truncated;
3178 
3179   return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3180 }
3181 
3182 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3183 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3184 // qNan and coverting the new source to integer and back to FP.
3185 static SDValue
3186 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3187                                             const RISCVSubtarget &Subtarget) {
3188   SDLoc DL(Op);
3189   MVT VT = Op.getSimpleValueType();
3190   SDValue Chain = Op.getOperand(0);
3191   SDValue Src = Op.getOperand(1);
3192 
3193   MVT ContainerVT = VT;
3194   if (VT.isFixedLengthVector()) {
3195     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3196     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3197   }
3198 
3199   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3200 
3201   // Freeze the source since we are increasing the number of uses.
3202   Src = DAG.getFreeze(Src);
3203 
3204   // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3205   MVT MaskVT = Mask.getSimpleValueType();
3206   SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3207                                 DAG.getVTList(MaskVT, MVT::Other),
3208                                 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3209                                  DAG.getUNDEF(MaskVT), Mask, VL});
3210   Chain = Unorder.getValue(1);
3211   Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3212                     DAG.getVTList(ContainerVT, MVT::Other),
3213                     {Chain, Src, Src, Src, Unorder, VL});
3214   Chain = Src.getValue(1);
3215 
3216   // We do the conversion on the absolute value and fix the sign at the end.
3217   SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3218 
3219   // Determine the largest integer that can be represented exactly. This and
3220   // values larger than it don't have any fractional bits so don't need to
3221   // be converted.
3222   const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3223   unsigned Precision = APFloat::semanticsPrecision(FltSem);
3224   APFloat MaxVal = APFloat(FltSem);
3225   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3226                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3227   SDValue MaxValNode =
3228       DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3229   SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3230                                     DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3231 
3232   // If abs(Src) was larger than MaxVal or nan, keep it.
3233   Mask = DAG.getNode(
3234       RISCVISD::SETCC_VL, DL, MaskVT,
3235       {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3236 
3237   // Truncate to integer and convert back to FP.
3238   MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3239   MVT XLenVT = Subtarget.getXLenVT();
3240   SDValue Truncated;
3241 
3242   switch (Op.getOpcode()) {
3243   default:
3244     llvm_unreachable("Unexpected opcode");
3245   case ISD::STRICT_FCEIL:
3246   case ISD::STRICT_FFLOOR:
3247   case ISD::STRICT_FROUND:
3248   case ISD::STRICT_FROUNDEVEN: {
3249     RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
3250     assert(FRM != RISCVFPRndMode::Invalid);
3251     Truncated = DAG.getNode(
3252         RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3253         {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3254     break;
3255   }
3256   case ISD::STRICT_FTRUNC:
3257     Truncated =
3258         DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3259                     DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3260     break;
3261   case ISD::STRICT_FNEARBYINT:
3262     Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3263                             DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3264                             Mask, VL);
3265     break;
3266   }
3267   Chain = Truncated.getValue(1);
3268 
3269   // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3270   if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3271     Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3272                             DAG.getVTList(ContainerVT, MVT::Other), Chain,
3273                             Truncated, Mask, VL);
3274     Chain = Truncated.getValue(1);
3275   }
3276 
3277   // Restore the original sign so that -0.0 is preserved.
3278   Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3279                           Src, Src, Mask, VL);
3280 
3281   if (VT.isFixedLengthVector())
3282     Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3283   return DAG.getMergeValues({Truncated, Chain}, DL);
3284 }
3285 
3286 static SDValue
3287 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3288                                 const RISCVSubtarget &Subtarget) {
3289   MVT VT = Op.getSimpleValueType();
3290   if (VT.isVector())
3291     return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3292 
3293   if (DAG.shouldOptForSize())
3294     return SDValue();
3295 
3296   SDLoc DL(Op);
3297   SDValue Src = Op.getOperand(0);
3298 
3299   // Create an integer the size of the mantissa with the MSB set. This and all
3300   // values larger than it don't have any fractional bits so don't need to be
3301   // converted.
3302   const fltSemantics &FltSem = VT.getFltSemantics();
3303   unsigned Precision = APFloat::semanticsPrecision(FltSem);
3304   APFloat MaxVal = APFloat(FltSem);
3305   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3306                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3307   SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3308 
3309   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
3310   return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3311                      DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3312 }
3313 
3314 // Expand vector LRINT and LLRINT by converting to the integer domain.
3315 static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
3316                                 const RISCVSubtarget &Subtarget) {
3317   MVT VT = Op.getSimpleValueType();
3318   assert(VT.isVector() && "Unexpected type");
3319 
3320   SDLoc DL(Op);
3321   SDValue Src = Op.getOperand(0);
3322   MVT ContainerVT = VT;
3323 
3324   if (VT.isFixedLengthVector()) {
3325     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3326     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3327   }
3328 
3329   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3330   SDValue Truncated = DAG.getNode(
3331       RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3332       DAG.getTargetConstant(RISCVFPRndMode::DYN, DL, Subtarget.getXLenVT()),
3333       VL);
3334 
3335   if (!VT.isFixedLengthVector())
3336     return Truncated;
3337 
3338   return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3339 }
3340 
3341 static SDValue
3342 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
3343               const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3344               SDValue Offset, SDValue Mask, SDValue VL,
3345               unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3346   if (Passthru.isUndef())
3347     Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3348   SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3349   SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3350   return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3351 }
3352 
3353 static SDValue
3354 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3355             EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3356             SDValue VL,
3357             unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3358   if (Passthru.isUndef())
3359     Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3360   SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3361   SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3362   return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3363 }
3364 
3365 static MVT getLMUL1VT(MVT VT) {
3366   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3367          "Unexpected vector MVT");
3368   return MVT::getScalableVectorVT(
3369       VT.getVectorElementType(),
3370       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3371 }
3372 
3373 struct VIDSequence {
3374   int64_t StepNumerator;
3375   unsigned StepDenominator;
3376   int64_t Addend;
3377 };
3378 
3379 static std::optional<APInt> getExactInteger(const APFloat &APF,
3380                                             uint32_t BitWidth) {
3381   // We will use a SINT_TO_FP to materialize this constant so we should use a
3382   // signed APSInt here.
3383   APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3384   // We use an arbitrary rounding mode here. If a floating-point is an exact
3385   // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3386   // the rounding mode changes the output value, then it is not an exact
3387   // integer.
3388   RoundingMode ArbitraryRM = RoundingMode::TowardZero;
3389   bool IsExact;
3390   // If it is out of signed integer range, it will return an invalid operation.
3391   // If it is not an exact integer, IsExact is false.
3392   if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3393        APFloatBase::opInvalidOp) ||
3394       !IsExact)
3395     return std::nullopt;
3396   return ValInt.extractBits(BitWidth, 0);
3397 }
3398 
3399 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3400 // to the (non-zero) step S and start value X. This can be then lowered as the
3401 // RVV sequence (VID * S) + X, for example.
3402 // The step S is represented as an integer numerator divided by a positive
3403 // denominator. Note that the implementation currently only identifies
3404 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
3405 // cannot detect 2/3, for example.
3406 // Note that this method will also match potentially unappealing index
3407 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3408 // determine whether this is worth generating code for.
3409 //
3410 // EltSizeInBits is the size of the type that the sequence will be calculated
3411 // in, i.e. SEW for build_vectors or XLEN for address calculations.
3412 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3413                                                       unsigned EltSizeInBits) {
3414   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3415   if (!cast<BuildVectorSDNode>(Op)->isConstant())
3416     return std::nullopt;
3417   bool IsInteger = Op.getValueType().isInteger();
3418 
3419   std::optional<unsigned> SeqStepDenom;
3420   std::optional<APInt> SeqStepNum;
3421   std::optional<APInt> SeqAddend;
3422   std::optional<std::pair<APInt, unsigned>> PrevElt;
3423   assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3424 
3425   // First extract the ops into a list of constant integer values. This may not
3426   // be possible for floats if they're not all representable as integers.
3427   SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3428   const unsigned OpSize = Op.getScalarValueSizeInBits();
3429   for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3430     if (Elt.isUndef()) {
3431       Elts[Idx] = std::nullopt;
3432       continue;
3433     }
3434     if (IsInteger) {
3435       Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3436     } else {
3437       auto ExactInteger =
3438           getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3439       if (!ExactInteger)
3440         return std::nullopt;
3441       Elts[Idx] = *ExactInteger;
3442     }
3443   }
3444 
3445   for (auto [Idx, Elt] : enumerate(Elts)) {
3446     // Assume undef elements match the sequence; we just have to be careful
3447     // when interpolating across them.
3448     if (!Elt)
3449       continue;
3450 
3451     if (PrevElt) {
3452       // Calculate the step since the last non-undef element, and ensure
3453       // it's consistent across the entire sequence.
3454       unsigned IdxDiff = Idx - PrevElt->second;
3455       APInt ValDiff = *Elt - PrevElt->first;
3456 
3457       // A zero-value value difference means that we're somewhere in the middle
3458       // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3459       // step change before evaluating the sequence.
3460       if (ValDiff == 0)
3461         continue;
3462 
3463       int64_t Remainder = ValDiff.srem(IdxDiff);
3464       // Normalize the step if it's greater than 1.
3465       if (Remainder != ValDiff.getSExtValue()) {
3466         // The difference must cleanly divide the element span.
3467         if (Remainder != 0)
3468           return std::nullopt;
3469         ValDiff = ValDiff.sdiv(IdxDiff);
3470         IdxDiff = 1;
3471       }
3472 
3473       if (!SeqStepNum)
3474         SeqStepNum = ValDiff;
3475       else if (ValDiff != SeqStepNum)
3476         return std::nullopt;
3477 
3478       if (!SeqStepDenom)
3479         SeqStepDenom = IdxDiff;
3480       else if (IdxDiff != *SeqStepDenom)
3481         return std::nullopt;
3482     }
3483 
3484     // Record this non-undef element for later.
3485     if (!PrevElt || PrevElt->first != *Elt)
3486       PrevElt = std::make_pair(*Elt, Idx);
3487   }
3488 
3489   // We need to have logged a step for this to count as a legal index sequence.
3490   if (!SeqStepNum || !SeqStepDenom)
3491     return std::nullopt;
3492 
3493   // Loop back through the sequence and validate elements we might have skipped
3494   // while waiting for a valid step. While doing this, log any sequence addend.
3495   for (auto [Idx, Elt] : enumerate(Elts)) {
3496     if (!Elt)
3497       continue;
3498     APInt ExpectedVal =
3499         (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3500          *SeqStepNum)
3501             .sdiv(*SeqStepDenom);
3502 
3503     APInt Addend = *Elt - ExpectedVal;
3504     if (!SeqAddend)
3505       SeqAddend = Addend;
3506     else if (Addend != SeqAddend)
3507       return std::nullopt;
3508   }
3509 
3510   assert(SeqAddend && "Must have an addend if we have a step");
3511 
3512   return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3513                      SeqAddend->getSExtValue()};
3514 }
3515 
3516 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3517 // and lower it as a VRGATHER_VX_VL from the source vector.
3518 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3519                                   SelectionDAG &DAG,
3520                                   const RISCVSubtarget &Subtarget) {
3521   if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3522     return SDValue();
3523   SDValue Src = SplatVal.getOperand(0);
3524   // Don't perform this optimization for i1 vectors, or if the element types are
3525   // different
3526   // FIXME: Support i1 vectors, maybe by promoting to i8?
3527   MVT EltTy = VT.getVectorElementType();
3528   MVT SrcVT = Src.getSimpleValueType();
3529   if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType())
3530     return SDValue();
3531   SDValue Idx = SplatVal.getOperand(1);
3532   // The index must be a legal type.
3533   if (Idx.getValueType() != Subtarget.getXLenVT())
3534     return SDValue();
3535 
3536   // Check that we know Idx lies within VT
3537   if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3538     auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3539     if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3540       return SDValue();
3541   }
3542 
3543   // Convert fixed length vectors to scalable
3544   MVT ContainerVT = VT;
3545   if (VT.isFixedLengthVector())
3546     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3547 
3548   MVT SrcContainerVT = SrcVT;
3549   if (SrcVT.isFixedLengthVector()) {
3550     SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3551     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3552   }
3553 
3554   // Put Vec in a VT sized vector
3555   if (SrcContainerVT.getVectorMinNumElements() <
3556       ContainerVT.getVectorMinNumElements())
3557     Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3558                       DAG.getUNDEF(ContainerVT), Src,
3559                       DAG.getVectorIdxConstant(0, DL));
3560   else
3561     Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Src,
3562                       DAG.getVectorIdxConstant(0, DL));
3563 
3564   // We checked that Idx fits inside VT earlier
3565   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3566   SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3567                                Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3568   if (VT.isFixedLengthVector())
3569     Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3570   return Gather;
3571 }
3572 
3573 /// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3574 /// which constitute a large proportion of the elements. In such cases we can
3575 /// splat a vector with the dominant element and make up the shortfall with
3576 /// INSERT_VECTOR_ELTs.  Returns SDValue if not profitable.
3577 /// Note that this includes vectors of 2 elements by association. The
3578 /// upper-most element is the "dominant" one, allowing us to use a splat to
3579 /// "insert" the upper element, and an insert of the lower element at position
3580 /// 0, which improves codegen.
3581 static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
3582                                                  const RISCVSubtarget &Subtarget) {
3583   MVT VT = Op.getSimpleValueType();
3584   assert(VT.isFixedLengthVector() && "Unexpected vector!");
3585 
3586   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3587 
3588   SDLoc DL(Op);
3589   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3590 
3591   MVT XLenVT = Subtarget.getXLenVT();
3592   unsigned NumElts = Op.getNumOperands();
3593 
3594   SDValue DominantValue;
3595   unsigned MostCommonCount = 0;
3596   DenseMap<SDValue, unsigned> ValueCounts;
3597   unsigned NumUndefElts =
3598       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3599 
3600   // Track the number of scalar loads we know we'd be inserting, estimated as
3601   // any non-zero floating-point constant. Other kinds of element are either
3602   // already in registers or are materialized on demand. The threshold at which
3603   // a vector load is more desirable than several scalar materializion and
3604   // vector-insertion instructions is not known.
3605   unsigned NumScalarLoads = 0;
3606 
3607   for (SDValue V : Op->op_values()) {
3608     if (V.isUndef())
3609       continue;
3610 
3611     unsigned &Count = ValueCounts[V];
3612     if (0 == Count)
3613       if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3614         NumScalarLoads += !CFP->isExactlyValue(+0.0);
3615 
3616     // Is this value dominant? In case of a tie, prefer the highest element as
3617     // it's cheaper to insert near the beginning of a vector than it is at the
3618     // end.
3619     if (++Count >= MostCommonCount) {
3620       DominantValue = V;
3621       MostCommonCount = Count;
3622     }
3623   }
3624 
3625   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3626   unsigned NumDefElts = NumElts - NumUndefElts;
3627   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3628 
3629   // Don't perform this optimization when optimizing for size, since
3630   // materializing elements and inserting them tends to cause code bloat.
3631   if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3632       (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3633       ((MostCommonCount > DominantValueCountThreshold) ||
3634        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3635     // Start by splatting the most common element.
3636     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3637 
3638     DenseSet<SDValue> Processed{DominantValue};
3639 
3640     // We can handle an insert into the last element (of a splat) via
3641     // v(f)slide1down.  This is slightly better than the vslideup insert
3642     // lowering as it avoids the need for a vector group temporary.  It
3643     // is also better than using vmerge.vx as it avoids the need to
3644     // materialize the mask in a vector register.
3645     if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3646         !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3647         LastOp != DominantValue) {
3648       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3649       auto OpCode =
3650         VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3651       if (!VT.isFloatingPoint())
3652         LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3653       Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3654                         LastOp, Mask, VL);
3655       Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3656       Processed.insert(LastOp);
3657     }
3658 
3659     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3660     for (const auto &OpIdx : enumerate(Op->ops())) {
3661       const SDValue &V = OpIdx.value();
3662       if (V.isUndef() || !Processed.insert(V).second)
3663         continue;
3664       if (ValueCounts[V] == 1) {
3665         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3666                           DAG.getVectorIdxConstant(OpIdx.index(), DL));
3667       } else {
3668         // Blend in all instances of this value using a VSELECT, using a
3669         // mask where each bit signals whether that element is the one
3670         // we're after.
3671         SmallVector<SDValue> Ops;
3672         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3673           return DAG.getConstant(V == V1, DL, XLenVT);
3674         });
3675         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3676                           DAG.getBuildVector(SelMaskTy, DL, Ops),
3677                           DAG.getSplatBuildVector(VT, DL, V), Vec);
3678       }
3679     }
3680 
3681     return Vec;
3682   }
3683 
3684   return SDValue();
3685 }
3686 
3687 static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3688                                            const RISCVSubtarget &Subtarget) {
3689   MVT VT = Op.getSimpleValueType();
3690   assert(VT.isFixedLengthVector() && "Unexpected vector!");
3691 
3692   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3693 
3694   SDLoc DL(Op);
3695   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3696 
3697   MVT XLenVT = Subtarget.getXLenVT();
3698   unsigned NumElts = Op.getNumOperands();
3699 
3700   if (VT.getVectorElementType() == MVT::i1) {
3701     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3702       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3703       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3704     }
3705 
3706     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3707       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3708       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3709     }
3710 
3711     // Lower constant mask BUILD_VECTORs via an integer vector type, in
3712     // scalar integer chunks whose bit-width depends on the number of mask
3713     // bits and XLEN.
3714     // First, determine the most appropriate scalar integer type to use. This
3715     // is at most XLenVT, but may be shrunk to a smaller vector element type
3716     // according to the size of the final vector - use i8 chunks rather than
3717     // XLenVT if we're producing a v8i1. This results in more consistent
3718     // codegen across RV32 and RV64.
3719     unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3720     NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3721     // If we have to use more than one INSERT_VECTOR_ELT then this
3722     // optimization is likely to increase code size; avoid peforming it in
3723     // such a case. We can use a load from a constant pool in this case.
3724     if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3725       return SDValue();
3726     // Now we can create our integer vector type. Note that it may be larger
3727     // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3728     unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3729     MVT IntegerViaVecVT =
3730       MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3731                        IntegerViaVecElts);
3732 
3733     uint64_t Bits = 0;
3734     unsigned BitPos = 0, IntegerEltIdx = 0;
3735     SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3736 
3737     for (unsigned I = 0; I < NumElts;) {
3738       SDValue V = Op.getOperand(I);
3739       bool BitValue = !V.isUndef() && V->getAsZExtVal();
3740       Bits |= ((uint64_t)BitValue << BitPos);
3741       ++BitPos;
3742       ++I;
3743 
3744       // Once we accumulate enough bits to fill our scalar type or process the
3745       // last element, insert into our vector and clear our accumulated data.
3746       if (I % NumViaIntegerBits == 0 || I == NumElts) {
3747         if (NumViaIntegerBits <= 32)
3748           Bits = SignExtend64<32>(Bits);
3749         SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3750         Elts[IntegerEltIdx] = Elt;
3751         Bits = 0;
3752         BitPos = 0;
3753         IntegerEltIdx++;
3754       }
3755     }
3756 
3757     SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3758 
3759     if (NumElts < NumViaIntegerBits) {
3760       // If we're producing a smaller vector than our minimum legal integer
3761       // type, bitcast to the equivalent (known-legal) mask type, and extract
3762       // our final mask.
3763       assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3764       Vec = DAG.getBitcast(MVT::v8i1, Vec);
3765       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3766                         DAG.getConstant(0, DL, XLenVT));
3767     } else {
3768       // Else we must have produced an integer type with the same size as the
3769       // mask type; bitcast for the final result.
3770       assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3771       Vec = DAG.getBitcast(VT, Vec);
3772     }
3773 
3774     return Vec;
3775   }
3776 
3777   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3778     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3779                                         : RISCVISD::VMV_V_X_VL;
3780     if (!VT.isFloatingPoint())
3781       Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3782     Splat =
3783         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3784     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3785   }
3786 
3787   // Try and match index sequences, which we can lower to the vid instruction
3788   // with optional modifications. An all-undef vector is matched by
3789   // getSplatValue, above.
3790   if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3791     int64_t StepNumerator = SimpleVID->StepNumerator;
3792     unsigned StepDenominator = SimpleVID->StepDenominator;
3793     int64_t Addend = SimpleVID->Addend;
3794 
3795     assert(StepNumerator != 0 && "Invalid step");
3796     bool Negate = false;
3797     int64_t SplatStepVal = StepNumerator;
3798     unsigned StepOpcode = ISD::MUL;
3799     // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3800     // anyway as the shift of 63 won't fit in uimm5.
3801     if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3802         isPowerOf2_64(std::abs(StepNumerator))) {
3803       Negate = StepNumerator < 0;
3804       StepOpcode = ISD::SHL;
3805       SplatStepVal = Log2_64(std::abs(StepNumerator));
3806     }
3807 
3808     // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3809     // threshold since it's the immediate value many RVV instructions accept.
3810     // There is no vmul.vi instruction so ensure multiply constant can fit in
3811     // a single addi instruction.
3812     if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3813          (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3814         isPowerOf2_32(StepDenominator) &&
3815         (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3816       MVT VIDVT =
3817           VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3818       MVT VIDContainerVT =
3819           getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3820       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3821       // Convert right out of the scalable type so we can use standard ISD
3822       // nodes for the rest of the computation. If we used scalable types with
3823       // these, we'd lose the fixed-length vector info and generate worse
3824       // vsetvli code.
3825       VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3826       if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3827           (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3828         SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3829         VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3830       }
3831       if (StepDenominator != 1) {
3832         SDValue SplatStep =
3833             DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3834         VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3835       }
3836       if (Addend != 0 || Negate) {
3837         SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3838         VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3839                           VID);
3840       }
3841       if (VT.isFloatingPoint()) {
3842         // TODO: Use vfwcvt to reduce register pressure.
3843         VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3844       }
3845       return VID;
3846     }
3847   }
3848 
3849   // For very small build_vectors, use a single scalar insert of a constant.
3850   // TODO: Base this on constant rematerialization cost, not size.
3851   const unsigned EltBitSize = VT.getScalarSizeInBits();
3852   if (VT.getSizeInBits() <= 32 &&
3853       ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
3854     MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3855     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3856            "Unexpected sequence type");
3857     // If we can use the original VL with the modified element type, this
3858     // means we only have a VTYPE toggle, not a VL toggle.  TODO: Should this
3859     // be moved into InsertVSETVLI?
3860     unsigned ViaVecLen =
3861       (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3862     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3863 
3864     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3865     uint64_t SplatValue = 0;
3866     // Construct the amalgamated value at this larger vector type.
3867     for (const auto &OpIdx : enumerate(Op->op_values())) {
3868       const auto &SeqV = OpIdx.value();
3869       if (!SeqV.isUndef())
3870         SplatValue |=
3871             ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3872     }
3873 
3874     // On RV64, sign-extend from 32 to 64 bits where possible in order to
3875     // achieve better constant materializion.
3876     // On RV32, we need to sign-extend to use getSignedConstant.
3877     if (ViaIntVT == MVT::i32)
3878       SplatValue = SignExtend64<32>(SplatValue);
3879 
3880     SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3881                               DAG.getUNDEF(ViaVecVT),
3882                               DAG.getSignedConstant(SplatValue, DL, XLenVT),
3883                               DAG.getVectorIdxConstant(0, DL));
3884     if (ViaVecLen != 1)
3885       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3886                         MVT::getVectorVT(ViaIntVT, 1), Vec,
3887                         DAG.getConstant(0, DL, XLenVT));
3888     return DAG.getBitcast(VT, Vec);
3889   }
3890 
3891 
3892   // Attempt to detect "hidden" splats, which only reveal themselves as splats
3893   // when re-interpreted as a vector with a larger element type. For example,
3894   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3895   // could be instead splat as
3896   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
3897   // TODO: This optimization could also work on non-constant splats, but it
3898   // would require bit-manipulation instructions to construct the splat value.
3899   SmallVector<SDValue> Sequence;
3900   const auto *BV = cast<BuildVectorSDNode>(Op);
3901   if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3902       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
3903       BV->getRepeatedSequence(Sequence) &&
3904       (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3905     unsigned SeqLen = Sequence.size();
3906     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3907     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3908             ViaIntVT == MVT::i64) &&
3909            "Unexpected sequence type");
3910 
3911     // If we can use the original VL with the modified element type, this
3912     // means we only have a VTYPE toggle, not a VL toggle.  TODO: Should this
3913     // be moved into InsertVSETVLI?
3914     const unsigned RequiredVL = NumElts / SeqLen;
3915     const unsigned ViaVecLen =
3916       (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3917       NumElts : RequiredVL;
3918     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3919 
3920     unsigned EltIdx = 0;
3921     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3922     uint64_t SplatValue = 0;
3923     // Construct the amalgamated value which can be splatted as this larger
3924     // vector type.
3925     for (const auto &SeqV : Sequence) {
3926       if (!SeqV.isUndef())
3927         SplatValue |=
3928             ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3929       EltIdx++;
3930     }
3931 
3932     // On RV64, sign-extend from 32 to 64 bits where possible in order to
3933     // achieve better constant materializion.
3934     // On RV32, we need to sign-extend to use getSignedConstant.
3935     if (ViaIntVT == MVT::i32)
3936       SplatValue = SignExtend64<32>(SplatValue);
3937 
3938     // Since we can't introduce illegal i64 types at this stage, we can only
3939     // perform an i64 splat on RV32 if it is its own sign-extended value. That
3940     // way we can use RVV instructions to splat.
3941     assert((ViaIntVT.bitsLE(XLenVT) ||
3942             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3943            "Unexpected bitcast sequence");
3944     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3945       SDValue ViaVL =
3946           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3947       MVT ViaContainerVT =
3948           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3949       SDValue Splat =
3950           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3951                       DAG.getUNDEF(ViaContainerVT),
3952                       DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3953       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3954       if (ViaVecLen != RequiredVL)
3955         Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
3956                             MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3957                             DAG.getConstant(0, DL, XLenVT));
3958       return DAG.getBitcast(VT, Splat);
3959     }
3960   }
3961 
3962   // If the number of signbits allows, see if we can lower as a <N x i8>.
3963   // Our main goal here is to reduce LMUL (and thus work) required to
3964   // build the constant, but we will also narrow if the resulting
3965   // narrow vector is known to materialize cheaply.
3966   // TODO: We really should be costing the smaller vector.  There are
3967   // profitable cases this misses.
3968   if (EltBitSize > 8 && VT.isInteger() &&
3969       (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3970       DAG.ComputeMaxSignificantBits(Op) <= 8) {
3971     SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3972                                         DL, Op->ops());
3973     Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3974                                      Source, DAG, Subtarget);
3975     SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3976     return convertFromScalableVector(VT, Res, DAG, Subtarget);
3977   }
3978 
3979   if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3980     return Res;
3981 
3982   // For constant vectors, use generic constant pool lowering.  Otherwise,
3983   // we'd have to materialize constants in GPRs just to move them into the
3984   // vector.
3985   return SDValue();
3986 }
3987 
3988 static unsigned getPACKOpcode(unsigned DestBW,
3989                               const RISCVSubtarget &Subtarget) {
3990   switch (DestBW) {
3991   default:
3992     llvm_unreachable("Unsupported pack size");
3993   case 16:
3994     return RISCV::PACKH;
3995   case 32:
3996     return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3997   case 64:
3998     assert(Subtarget.is64Bit());
3999     return RISCV::PACK;
4000   }
4001 }
4002 
4003 /// Double the element size of the build vector to reduce the number
4004 /// of vslide1down in the build vector chain.  In the worst case, this
4005 /// trades three scalar operations for 1 vector operation.  Scalar
4006 /// operations are generally lower latency, and for out-of-order cores
4007 /// we also benefit from additional parallelism.
4008 static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG,
4009                                           const RISCVSubtarget &Subtarget) {
4010   SDLoc DL(Op);
4011   MVT VT = Op.getSimpleValueType();
4012   assert(VT.isFixedLengthVector() && "Unexpected vector!");
4013   MVT ElemVT = VT.getVectorElementType();
4014   if (!ElemVT.isInteger())
4015     return SDValue();
4016 
4017   // TODO: Relax these architectural restrictions, possibly with costing
4018   // of the actual instructions required.
4019   if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4020     return SDValue();
4021 
4022   unsigned NumElts = VT.getVectorNumElements();
4023   unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4024   if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4025       NumElts % 2 != 0)
4026     return SDValue();
4027 
4028   // Produce [B,A] packed into a type twice as wide.  Note that all
4029   // scalars are XLenVT, possibly masked (see below).
4030   MVT XLenVT = Subtarget.getXLenVT();
4031   SDValue Mask = DAG.getConstant(
4032       APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4033   auto pack = [&](SDValue A, SDValue B) {
4034     // Bias the scheduling of the inserted operations to near the
4035     // definition of the element - this tends to reduce register
4036     // pressure overall.
4037     SDLoc ElemDL(B);
4038     if (Subtarget.hasStdExtZbkb())
4039       // Note that we're relying on the high bits of the result being
4040       // don't care.  For PACKW, the result is *sign* extended.
4041       return SDValue(
4042           DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4043                              ElemDL, XLenVT, A, B),
4044           0);
4045 
4046     A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4047     B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4048     SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4049     return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4050                        DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4051                        SDNodeFlags::Disjoint);
4052   };
4053 
4054   SmallVector<SDValue> NewOperands;
4055   NewOperands.reserve(NumElts / 2);
4056   for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4057     NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4058   assert(NumElts == NewOperands.size() * 2);
4059   MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4060   MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4061   return DAG.getNode(ISD::BITCAST, DL, VT,
4062                      DAG.getBuildVector(WideVecVT, DL, NewOperands));
4063 }
4064 
4065 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
4066                                  const RISCVSubtarget &Subtarget) {
4067   MVT VT = Op.getSimpleValueType();
4068   assert(VT.isFixedLengthVector() && "Unexpected vector!");
4069 
4070   MVT EltVT = VT.getVectorElementType();
4071   MVT XLenVT = Subtarget.getXLenVT();
4072 
4073   SDLoc DL(Op);
4074 
4075   // Proper support for f16 requires Zvfh. bf16 always requires special
4076   // handling. We need to cast the scalar to integer and create an integer
4077   // build_vector.
4078   if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4079     MVT IVT = VT.changeVectorElementType(MVT::i16);
4080     SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4081     for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4082       SDValue Elem = Op.getOperand(I);
4083       if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4084           (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4085         // Called by LegalizeDAG, we need to use XLenVT operations since we
4086         // can't create illegal types.
4087         if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4088           // Manually constant fold so the integer build_vector can be lowered
4089           // better. Waiting for DAGCombine will be too late.
4090           APInt V =
4091               C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4092           NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4093         } else {
4094           NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4095         }
4096       } else {
4097         // Called by scalar type legalizer, we can use i16.
4098         NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4099       }
4100     }
4101     SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4102     return DAG.getBitcast(VT, Res);
4103   }
4104 
4105   if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4106       ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
4107     return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4108 
4109   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4110 
4111   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4112 
4113   if (VT.getVectorElementType() == MVT::i1) {
4114     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4115     // vector type, we have a legal equivalently-sized i8 type, so we can use
4116     // that.
4117     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4118     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4119 
4120     SDValue WideVec;
4121     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4122       // For a splat, perform a scalar truncate before creating the wider
4123       // vector.
4124       Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4125                           DAG.getConstant(1, DL, Splat.getValueType()));
4126       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4127     } else {
4128       SmallVector<SDValue, 8> Ops(Op->op_values());
4129       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4130       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4131       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4132     }
4133 
4134     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4135   }
4136 
4137   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4138     if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4139       return Gather;
4140     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4141                                         : RISCVISD::VMV_V_X_VL;
4142     if (!VT.isFloatingPoint())
4143       Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4144     Splat =
4145         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4146     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4147   }
4148 
4149   if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4150     return Res;
4151 
4152   // If we're compiling for an exact VLEN value, we can split our work per
4153   // register in the register group.
4154   if (const auto VLen = Subtarget.getRealVLen();
4155       VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4156     MVT ElemVT = VT.getVectorElementType();
4157     unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4158     EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4159     MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4160     MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4161     assert(M1VT == getLMUL1VT(M1VT));
4162 
4163     // The following semantically builds up a fixed length concat_vector
4164     // of the component build_vectors.  We eagerly lower to scalable and
4165     // insert_subvector here to avoid DAG combining it back to a large
4166     // build_vector.
4167     SmallVector<SDValue> BuildVectorOps(Op->ops());
4168     unsigned NumOpElts = M1VT.getVectorMinNumElements();
4169     SDValue Vec = DAG.getUNDEF(ContainerVT);
4170     for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4171       auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4172       SDValue SubBV =
4173           DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4174       SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4175       unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4176       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4177                         DAG.getVectorIdxConstant(InsertIdx, DL));
4178     }
4179     return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4180   }
4181 
4182   // If we're about to resort to vslide1down (or stack usage), pack our
4183   // elements into the widest scalar type we can.  This will force a VL/VTYPE
4184   // toggle, but reduces the critical path, the number of vslide1down ops
4185   // required, and possibly enables scalar folds of the values.
4186   if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4187     return Res;
4188 
4189   // For m1 vectors, if we have non-undef values in both halves of our vector,
4190   // split the vector into low and high halves, build them separately, then
4191   // use a vselect to combine them.  For long vectors, this cuts the critical
4192   // path of the vslide1down sequence in half, and gives us an opportunity
4193   // to special case each half independently.  Note that we don't change the
4194   // length of the sub-vectors here, so if both fallback to the generic
4195   // vslide1down path, we should be able to fold the vselect into the final
4196   // vslidedown (for the undef tail) for the first half w/ masking.
4197   unsigned NumElts = VT.getVectorNumElements();
4198   unsigned NumUndefElts =
4199       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4200   unsigned NumDefElts = NumElts - NumUndefElts;
4201   if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4202       ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4203     SmallVector<SDValue> SubVecAOps, SubVecBOps;
4204     SmallVector<SDValue> MaskVals;
4205     SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4206     SubVecAOps.reserve(NumElts);
4207     SubVecBOps.reserve(NumElts);
4208     for (unsigned i = 0; i < NumElts; i++) {
4209       SDValue Elem = Op->getOperand(i);
4210       if (i < NumElts / 2) {
4211         SubVecAOps.push_back(Elem);
4212         SubVecBOps.push_back(UndefElem);
4213       } else {
4214         SubVecAOps.push_back(UndefElem);
4215         SubVecBOps.push_back(Elem);
4216       }
4217       bool SelectMaskVal = (i < NumElts / 2);
4218       MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4219     }
4220     assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4221            MaskVals.size() == NumElts);
4222 
4223     SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4224     SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4225     MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4226     SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4227     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4228   }
4229 
4230   // Cap the cost at a value linear to the number of elements in the vector.
4231   // The default lowering is to use the stack.  The vector store + scalar loads
4232   // is linear in VL.  However, at high lmuls vslide1down and vslidedown end up
4233   // being (at least) linear in LMUL.  As a result, using the vslidedown
4234   // lowering for every element ends up being VL*LMUL..
4235   // TODO: Should we be directly costing the stack alternative?  Doing so might
4236   // give us a more accurate upper bound.
4237   InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4238 
4239   // TODO: unify with TTI getSlideCost.
4240   InstructionCost PerSlideCost = 1;
4241   switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4242   default: break;
4243   case RISCVII::VLMUL::LMUL_2:
4244     PerSlideCost = 2;
4245     break;
4246   case RISCVII::VLMUL::LMUL_4:
4247     PerSlideCost = 4;
4248     break;
4249   case RISCVII::VLMUL::LMUL_8:
4250     PerSlideCost = 8;
4251     break;
4252   }
4253 
4254   // TODO: Should we be using the build instseq then cost + evaluate scheme
4255   // we use for integer constants here?
4256   unsigned UndefCount = 0;
4257   for (const SDValue &V : Op->ops()) {
4258     if (V.isUndef()) {
4259       UndefCount++;
4260       continue;
4261     }
4262     if (UndefCount) {
4263       LinearBudget -= PerSlideCost;
4264       UndefCount = 0;
4265     }
4266     LinearBudget -= PerSlideCost;
4267   }
4268   if (UndefCount) {
4269     LinearBudget -= PerSlideCost;
4270   }
4271 
4272   if (LinearBudget < 0)
4273     return SDValue();
4274 
4275   assert((!VT.isFloatingPoint() ||
4276           VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4277          "Illegal type which will result in reserved encoding");
4278 
4279   const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4280 
4281   SDValue Vec;
4282   UndefCount = 0;
4283   for (SDValue V : Op->ops()) {
4284     if (V.isUndef()) {
4285       UndefCount++;
4286       continue;
4287     }
4288 
4289     // Start our sequence with a TA splat in the hopes that hardware is able to
4290     // recognize there's no dependency on the prior value of our temporary
4291     // register.
4292     if (!Vec) {
4293       Vec = DAG.getSplatVector(VT, DL, V);
4294       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4295       UndefCount = 0;
4296       continue;
4297     }
4298 
4299     if (UndefCount) {
4300       const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4301       Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4302                           Vec, Offset, Mask, VL, Policy);
4303       UndefCount = 0;
4304     }
4305     auto OpCode =
4306       VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
4307     if (!VT.isFloatingPoint())
4308       V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4309     Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4310                       V, Mask, VL);
4311   }
4312   if (UndefCount) {
4313     const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4314     Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4315                         Vec, Offset, Mask, VL, Policy);
4316   }
4317   return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4318 }
4319 
4320 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4321                                    SDValue Lo, SDValue Hi, SDValue VL,
4322                                    SelectionDAG &DAG) {
4323   if (!Passthru)
4324     Passthru = DAG.getUNDEF(VT);
4325   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4326     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4327     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4328     // If Hi constant is all the same sign bit as Lo, lower this as a custom
4329     // node in order to try and match RVV vector/scalar instructions.
4330     if ((LoC >> 31) == HiC)
4331       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4332 
4333     // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4334     // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4335     // vlmax vsetvli or vsetivli to change the VL.
4336     // FIXME: Support larger constants?
4337     // FIXME: Support non-constant VLs by saturating?
4338     if (LoC == HiC) {
4339       SDValue NewVL;
4340       if (isAllOnesConstant(VL) ||
4341           (isa<RegisterSDNode>(VL) &&
4342            cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4343         NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4344       else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4345         NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4346 
4347       if (NewVL) {
4348         MVT InterVT =
4349             MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4350         auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4351                                     DAG.getUNDEF(InterVT), Lo, NewVL);
4352         return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4353       }
4354     }
4355   }
4356 
4357   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4358   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4359       isa<ConstantSDNode>(Hi.getOperand(1)) &&
4360       Hi.getConstantOperandVal(1) == 31)
4361     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4362 
4363   // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4364   // even if it might be sign extended.
4365   if (Hi.isUndef())
4366     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4367 
4368   // Fall back to a stack store and stride x0 vector load.
4369   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4370                      Hi, VL);
4371 }
4372 
4373 // Called by type legalization to handle splat of i64 on RV32.
4374 // FIXME: We can optimize this when the type has sign or zero bits in one
4375 // of the halves.
4376 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4377                                    SDValue Scalar, SDValue VL,
4378                                    SelectionDAG &DAG) {
4379   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4380   SDValue Lo, Hi;
4381   std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4382   return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4383 }
4384 
4385 // This function lowers a splat of a scalar operand Splat with the vector
4386 // length VL. It ensures the final sequence is type legal, which is useful when
4387 // lowering a splat after type legalization.
4388 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4389                                 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4390                                 const RISCVSubtarget &Subtarget) {
4391   bool HasPassthru = Passthru && !Passthru.isUndef();
4392   if (!HasPassthru && !Passthru)
4393     Passthru = DAG.getUNDEF(VT);
4394 
4395   MVT EltVT = VT.getVectorElementType();
4396   MVT XLenVT = Subtarget.getXLenVT();
4397 
4398   if (VT.isFloatingPoint()) {
4399     if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4400         EltVT == MVT::bf16) {
4401       if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4402           (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4403         Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4404       else
4405         Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4406       MVT IVT = VT.changeVectorElementType(MVT::i16);
4407       Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4408       SDValue Splat =
4409           lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4410       return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4411     }
4412     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4413   }
4414 
4415   // Simplest case is that the operand needs to be promoted to XLenVT.
4416   if (Scalar.getValueType().bitsLE(XLenVT)) {
4417     // If the operand is a constant, sign extend to increase our chances
4418     // of being able to use a .vi instruction. ANY_EXTEND would become a
4419     // a zero extend and the simm5 check in isel would fail.
4420     // FIXME: Should we ignore the upper bits in isel instead?
4421     unsigned ExtOpc =
4422         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4423     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4424     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4425   }
4426 
4427   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4428          "Unexpected scalar for splat lowering!");
4429 
4430   if (isOneConstant(VL) && isNullConstant(Scalar))
4431     return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4432                        DAG.getConstant(0, DL, XLenVT), VL);
4433 
4434   // Otherwise use the more complicated splatting algorithm.
4435   return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4436 }
4437 
4438 // This function lowers an insert of a scalar operand Scalar into lane
4439 // 0 of the vector regardless of the value of VL.  The contents of the
4440 // remaining lanes of the result vector are unspecified.  VL is assumed
4441 // to be non-zero.
4442 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
4443                                  const SDLoc &DL, SelectionDAG &DAG,
4444                                  const RISCVSubtarget &Subtarget) {
4445   assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4446 
4447   const MVT XLenVT = Subtarget.getXLenVT();
4448   SDValue Passthru = DAG.getUNDEF(VT);
4449 
4450   if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4451       isNullConstant(Scalar.getOperand(1))) {
4452     SDValue ExtractedVal = Scalar.getOperand(0);
4453     // The element types must be the same.
4454     if (ExtractedVal.getValueType().getVectorElementType() ==
4455         VT.getVectorElementType()) {
4456       MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4457       MVT ExtractedContainerVT = ExtractedVT;
4458       if (ExtractedContainerVT.isFixedLengthVector()) {
4459         ExtractedContainerVT = getContainerForFixedLengthVector(
4460             DAG, ExtractedContainerVT, Subtarget);
4461         ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4462                                                ExtractedVal, DAG, Subtarget);
4463       }
4464       if (ExtractedContainerVT.bitsLE(VT))
4465         return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4466                            ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4467       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4468                          DAG.getVectorIdxConstant(0, DL));
4469     }
4470   }
4471 
4472 
4473   if (VT.isFloatingPoint())
4474     return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4475                        DAG.getUNDEF(VT), Scalar, VL);
4476 
4477   // Avoid the tricky legalization cases by falling back to using the
4478   // splat code which already handles it gracefully.
4479   if (!Scalar.getValueType().bitsLE(XLenVT))
4480     return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4481                             DAG.getConstant(1, DL, XLenVT),
4482                             VT, DL, DAG, Subtarget);
4483 
4484   // If the operand is a constant, sign extend to increase our chances
4485   // of being able to use a .vi instruction. ANY_EXTEND would become a
4486   // a zero extend and the simm5 check in isel would fail.
4487   // FIXME: Should we ignore the upper bits in isel instead?
4488   unsigned ExtOpc =
4489       isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4490   Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4491   return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4492                      VL);
4493 }
4494 
4495 // Can this shuffle be performed on exactly one (possibly larger) input?
4496 static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4497                                    SDValue V2) {
4498 
4499   if (V2.isUndef() &&
4500       RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8)
4501     return V1;
4502 
4503   // Both input must be extracts.
4504   if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4505       V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4506     return SDValue();
4507 
4508   // Extracting from the same source.
4509   SDValue Src = V1.getOperand(0);
4510   if (Src != V2.getOperand(0))
4511     return SDValue();
4512 
4513   // Src needs to have twice the number of elements.
4514   unsigned NumElts = VT.getVectorNumElements();
4515   if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4516     return SDValue();
4517 
4518   // The extracts must extract the two halves of the source.
4519   if (V1.getConstantOperandVal(1) != 0 ||
4520       V2.getConstantOperandVal(1) != NumElts)
4521     return SDValue();
4522 
4523   return Src;
4524 }
4525 
4526 /// Is this shuffle interleaving contiguous elements from one vector into the
4527 /// even elements and contiguous elements from another vector into the odd
4528 /// elements. \p EvenSrc will contain the element that should be in the first
4529 /// even element. \p OddSrc will contain the element that should be in the first
4530 /// odd element. These can be the first element in a source or the element half
4531 /// way through the source.
4532 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4533                                 int &OddSrc, const RISCVSubtarget &Subtarget) {
4534   // We need to be able to widen elements to the next larger integer type.
4535   if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4536     return false;
4537 
4538   int Size = Mask.size();
4539   int NumElts = VT.getVectorNumElements();
4540   assert(Size == (int)NumElts && "Unexpected mask size");
4541 
4542   SmallVector<unsigned, 2> StartIndexes;
4543   if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4544     return false;
4545 
4546   EvenSrc = StartIndexes[0];
4547   OddSrc = StartIndexes[1];
4548 
4549   // One source should be low half of first vector.
4550   if (EvenSrc != 0 && OddSrc != 0)
4551     return false;
4552 
4553   // Subvectors will be subtracted from either at the start of the two input
4554   // vectors, or at the start and middle of the first vector if it's an unary
4555   // interleave.
4556   // In both cases, HalfNumElts will be extracted.
4557   // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4558   // we'll create an illegal extract_subvector.
4559   // FIXME: We could support other values using a slidedown first.
4560   int HalfNumElts = NumElts / 2;
4561   return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4562 }
4563 
4564 /// Match shuffles that concatenate two vectors, rotate the concatenation,
4565 /// and then extract the original number of elements from the rotated result.
4566 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4567 /// returned rotation amount is for a rotate right, where elements move from
4568 /// higher elements to lower elements. \p LoSrc indicates the first source
4569 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4570 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4571 /// 0 or 1 if a rotation is found.
4572 ///
4573 /// NOTE: We talk about rotate to the right which matches how bit shift and
4574 /// rotate instructions are described where LSBs are on the right, but LLVM IR
4575 /// and the table below write vectors with the lowest elements on the left.
4576 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4577   int Size = Mask.size();
4578 
4579   // We need to detect various ways of spelling a rotation:
4580   //   [11, 12, 13, 14, 15,  0,  1,  2]
4581   //   [-1, 12, 13, 14, -1, -1,  1, -1]
4582   //   [-1, -1, -1, -1, -1, -1,  1,  2]
4583   //   [ 3,  4,  5,  6,  7,  8,  9, 10]
4584   //   [-1,  4,  5,  6, -1, -1,  9, -1]
4585   //   [-1,  4,  5,  6, -1, -1, -1, -1]
4586   int Rotation = 0;
4587   LoSrc = -1;
4588   HiSrc = -1;
4589   for (int i = 0; i != Size; ++i) {
4590     int M = Mask[i];
4591     if (M < 0)
4592       continue;
4593 
4594     // Determine where a rotate vector would have started.
4595     int StartIdx = i - (M % Size);
4596     // The identity rotation isn't interesting, stop.
4597     if (StartIdx == 0)
4598       return -1;
4599 
4600     // If we found the tail of a vector the rotation must be the missing
4601     // front. If we found the head of a vector, it must be how much of the
4602     // head.
4603     int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4604 
4605     if (Rotation == 0)
4606       Rotation = CandidateRotation;
4607     else if (Rotation != CandidateRotation)
4608       // The rotations don't match, so we can't match this mask.
4609       return -1;
4610 
4611     // Compute which value this mask is pointing at.
4612     int MaskSrc = M < Size ? 0 : 1;
4613 
4614     // Compute which of the two target values this index should be assigned to.
4615     // This reflects whether the high elements are remaining or the low elemnts
4616     // are remaining.
4617     int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4618 
4619     // Either set up this value if we've not encountered it before, or check
4620     // that it remains consistent.
4621     if (TargetSrc < 0)
4622       TargetSrc = MaskSrc;
4623     else if (TargetSrc != MaskSrc)
4624       // This may be a rotation, but it pulls from the inputs in some
4625       // unsupported interleaving.
4626       return -1;
4627   }
4628 
4629   // Check that we successfully analyzed the mask, and normalize the results.
4630   assert(Rotation != 0 && "Failed to locate a viable rotation!");
4631   assert((LoSrc >= 0 || HiSrc >= 0) &&
4632          "Failed to find a rotated input vector!");
4633 
4634   return Rotation;
4635 }
4636 
4637 // Lower a deinterleave shuffle to SRL and TRUNC.  Factor must be
4638 // 2, 4, 8 and the integer type Factor-times larger than VT's
4639 // element type must be a legal element type.
4640 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4641 //                          -> [p, q, r, s] (Factor=2, Index=1)
4642 static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT,
4643                                             SDValue Src, unsigned Factor,
4644                                             unsigned Index, SelectionDAG &DAG) {
4645   unsigned EltBits = VT.getScalarSizeInBits();
4646   ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4647   MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4648                                    SrcEC.divideCoefficientBy(Factor));
4649   MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4650                                SrcEC.divideCoefficientBy(Factor));
4651   Src = DAG.getBitcast(WideSrcVT, Src);
4652 
4653   unsigned Shift = Index * EltBits;
4654   SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4655                             DAG.getConstant(Shift, DL, WideSrcVT));
4656   Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4657   MVT IntVT = VT.changeVectorElementTypeToInteger();
4658   Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4659                     DAG.getVectorIdxConstant(0, DL));
4660   return DAG.getBitcast(VT, Res);
4661 }
4662 
4663 // Lower the following shuffle to vslidedown.
4664 // a)
4665 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
4666 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
4667 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4668 // b)
4669 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
4670 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
4671 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
4672 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
4673 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
4674 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4675 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
4676                                                SDValue V1, SDValue V2,
4677                                                ArrayRef<int> Mask,
4678                                                const RISCVSubtarget &Subtarget,
4679                                                SelectionDAG &DAG) {
4680   auto findNonEXTRACT_SUBVECTORParent =
4681       [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4682     uint64_t Offset = 0;
4683     while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4684            // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4685            // a scalable vector. But we don't want to match the case.
4686            Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4687       Offset += Parent.getConstantOperandVal(1);
4688       Parent = Parent.getOperand(0);
4689     }
4690     return std::make_pair(Parent, Offset);
4691   };
4692 
4693   auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4694   auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4695 
4696   // Extracting from the same source.
4697   SDValue Src = V1Src;
4698   if (Src != V2Src)
4699     return SDValue();
4700 
4701   // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4702   SmallVector<int, 16> NewMask(Mask);
4703   for (size_t i = 0; i != NewMask.size(); ++i) {
4704     if (NewMask[i] == -1)
4705       continue;
4706 
4707     if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4708       NewMask[i] = NewMask[i] + V1IndexOffset;
4709     } else {
4710       // Minus NewMask.size() is needed. Otherwise, the b case would be
4711       // <5,6,7,12> instead of <5,6,7,8>.
4712       NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4713     }
4714   }
4715 
4716   // First index must be known and non-zero. It will be used as the slidedown
4717   // amount.
4718   if (NewMask[0] <= 0)
4719     return SDValue();
4720 
4721   // NewMask is also continuous.
4722   for (unsigned i = 1; i != NewMask.size(); ++i)
4723     if (NewMask[i - 1] + 1 != NewMask[i])
4724       return SDValue();
4725 
4726   MVT XLenVT = Subtarget.getXLenVT();
4727   MVT SrcVT = Src.getSimpleValueType();
4728   MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4729   auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4730   SDValue Slidedown =
4731       getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4732                     convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4733                     DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4734   return DAG.getNode(
4735       ISD::EXTRACT_SUBVECTOR, DL, VT,
4736       convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4737       DAG.getConstant(0, DL, XLenVT));
4738 }
4739 
4740 // Because vslideup leaves the destination elements at the start intact, we can
4741 // use it to perform shuffles that insert subvectors:
4742 //
4743 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4744 // ->
4745 // vsetvli zero, 8, e8, mf2, ta, ma
4746 // vslideup.vi v8, v9, 4
4747 //
4748 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4749 // ->
4750 // vsetvli zero, 5, e8, mf2, tu, ma
4751 // vslideup.v1 v8, v9, 2
4752 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
4753                                              SDValue V1, SDValue V2,
4754                                              ArrayRef<int> Mask,
4755                                              const RISCVSubtarget &Subtarget,
4756                                              SelectionDAG &DAG) {
4757   unsigned NumElts = VT.getVectorNumElements();
4758   int NumSubElts, Index;
4759   if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4760                                                 Index))
4761     return SDValue();
4762 
4763   bool OpsSwapped = Mask[Index] < (int)NumElts;
4764   SDValue InPlace = OpsSwapped ? V2 : V1;
4765   SDValue ToInsert = OpsSwapped ? V1 : V2;
4766 
4767   MVT XLenVT = Subtarget.getXLenVT();
4768   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4769   auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4770   // We slide up by the index that the subvector is being inserted at, and set
4771   // VL to the index + the number of elements being inserted.
4772   unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
4773   // If the we're adding a suffix to the in place vector, i.e. inserting right
4774   // up to the very end of it, then we don't actually care about the tail.
4775   if (NumSubElts + Index >= (int)NumElts)
4776     Policy |= RISCVII::TAIL_AGNOSTIC;
4777 
4778   InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4779   ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4780   SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4781 
4782   SDValue Res;
4783   // If we're inserting into the lowest elements, use a tail undisturbed
4784   // vmv.v.v.
4785   if (Index == 0)
4786     Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4787                       VL);
4788   else
4789     Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4790                       DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4791   return convertFromScalableVector(VT, Res, DAG, Subtarget);
4792 }
4793 
4794 /// Match v(f)slide1up/down idioms.  These operations involve sliding
4795 /// N-1 elements to make room for an inserted scalar at one end.
4796 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
4797                                             SDValue V1, SDValue V2,
4798                                             ArrayRef<int> Mask,
4799                                             const RISCVSubtarget &Subtarget,
4800                                             SelectionDAG &DAG) {
4801   bool OpsSwapped = false;
4802   if (!isa<BuildVectorSDNode>(V1)) {
4803     if (!isa<BuildVectorSDNode>(V2))
4804       return SDValue();
4805     std::swap(V1, V2);
4806     OpsSwapped = true;
4807   }
4808   SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4809   if (!Splat)
4810     return SDValue();
4811 
4812   // Return true if the mask could describe a slide of Mask.size() - 1
4813   // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4814   auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4815     const unsigned S = (Offset > 0) ? 0 : -Offset;
4816     const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4817     for (unsigned i = S; i != E; ++i)
4818       if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4819         return false;
4820     return true;
4821   };
4822 
4823   const unsigned NumElts = VT.getVectorNumElements();
4824   bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4825   if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4826     return SDValue();
4827 
4828   const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4829   // Inserted lane must come from splat, undef scalar is legal but not profitable.
4830   if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4831     return SDValue();
4832 
4833   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4834   auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4835 
4836   // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4837   // vslide1{down,up}.vx instead.
4838   if (VT.getVectorElementType() == MVT::bf16 ||
4839       (VT.getVectorElementType() == MVT::f16 &&
4840        !Subtarget.hasVInstructionsF16())) {
4841     MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4842     Splat =
4843         DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4844     V2 = DAG.getBitcast(
4845         IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4846     SDValue Vec = DAG.getNode(
4847         IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
4848         IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4849     Vec = DAG.getBitcast(ContainerVT, Vec);
4850     return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4851   }
4852 
4853   auto OpCode = IsVSlidedown ?
4854     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
4855     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
4856   if (!VT.isFloatingPoint())
4857     Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4858   auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4859                          DAG.getUNDEF(ContainerVT),
4860                          convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4861                          Splat, TrueMask, VL);
4862   return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4863 }
4864 
4865 // Match a mask which "spreads" the leading elements of a vector evenly
4866 // across the result.  Factor is the spread amount, and Index is the
4867 // offset applied.  (on success, Index < Factor)  This is the inverse
4868 // of a deinterleave with the same Factor and Index.  This is analogous
4869 // to an interleave, except that all but one lane is undef.
4870 static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4871   SmallVector<bool> LaneIsUndef(Factor, true);
4872   for (unsigned i = 0; i < Mask.size(); i++)
4873     LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4874 
4875   bool Found = false;
4876   for (unsigned i = 0; i < Factor; i++) {
4877     if (LaneIsUndef[i])
4878       continue;
4879     if (Found)
4880       return false;
4881     Index = i;
4882     Found = true;
4883   }
4884   if (!Found)
4885     return false;
4886 
4887   for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4888     unsigned j = i * Factor + Index;
4889     if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4890       return false;
4891   }
4892   return true;
4893 }
4894 
4895 // Given a vector a, b, c, d return a vector Factor times longer
4896 // with Factor-1 undef's between elements. Ex:
4897 //   a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4898 //   undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4899 static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4900                                  const SDLoc &DL, SelectionDAG &DAG) {
4901 
4902   MVT VT = V.getSimpleValueType();
4903   unsigned EltBits = VT.getScalarSizeInBits();
4904   ElementCount EC = VT.getVectorElementCount();
4905   V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4906 
4907   MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4908 
4909   SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4910   // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4911   // allow the SHL to fold away if Index is 0.
4912   if (Index != 0)
4913     Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4914                          DAG.getConstant(EltBits * Index, DL, WideVT));
4915   // Make sure to use original element type
4916   MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(),
4917                                   EC.multiplyCoefficientBy(Factor));
4918   return DAG.getBitcast(ResultVT, Result);
4919 }
4920 
4921 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4922 // to create an interleaved vector of <[vscale x] n*2 x ty>.
4923 // This requires that the size of ty is less than the subtarget's maximum ELEN.
4924 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
4925                                      const SDLoc &DL, SelectionDAG &DAG,
4926                                      const RISCVSubtarget &Subtarget) {
4927 
4928   // FIXME: Not only does this optimize the code, it fixes some correctness
4929   // issues because MIR does not have freeze.
4930   if (EvenV.isUndef())
4931     return getWideningSpread(OddV, 2, 1, DL, DAG);
4932   if (OddV.isUndef())
4933     return getWideningSpread(EvenV, 2, 0, DL, DAG);
4934 
4935   MVT VecVT = EvenV.getSimpleValueType();
4936   MVT VecContainerVT = VecVT; // <vscale x n x ty>
4937   // Convert fixed vectors to scalable if needed
4938   if (VecContainerVT.isFixedLengthVector()) {
4939     VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4940     EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4941     OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4942   }
4943 
4944   assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4945 
4946   // We're working with a vector of the same size as the resulting
4947   // interleaved vector, but with half the number of elements and
4948   // twice the SEW (Hence the restriction on not using the maximum
4949   // ELEN)
4950   MVT WideVT =
4951       MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
4952                        VecVT.getVectorElementCount());
4953   MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4954   if (WideContainerVT.isFixedLengthVector())
4955     WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4956 
4957   // Bitcast the input vectors to integers in case they are FP
4958   VecContainerVT = VecContainerVT.changeTypeToInteger();
4959   EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4960   OddV = DAG.getBitcast(VecContainerVT, OddV);
4961 
4962   auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4963   SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4964 
4965   SDValue Interleaved;
4966   if (Subtarget.hasStdExtZvbb()) {
4967     // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4968     SDValue OffsetVec =
4969         DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4970     Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4971                               OffsetVec, Passthru, Mask, VL);
4972     Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4973                               Interleaved, EvenV, Passthru, Mask, VL);
4974   } else {
4975     // FIXME: We should freeze the odd vector here. We already handled the case
4976     // of provably undef/poison above.
4977 
4978     // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4979     // vwaddu.vv
4980     Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4981                               OddV, Passthru, Mask, VL);
4982 
4983     // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4984     SDValue AllOnesVec = DAG.getSplatVector(
4985         VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4986     SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4987                                   OddV, AllOnesVec, Passthru, Mask, VL);
4988 
4989     // Add the two together so we get
4990     //   (OddV * 0xff...ff) + (OddV + EvenV)
4991     // = (OddV * 0x100...00) + EvenV
4992     // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4993     // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4994     Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4995                               Interleaved, OddsMul, Passthru, Mask, VL);
4996   }
4997 
4998   // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4999   MVT ResultContainerVT = MVT::getVectorVT(
5000       VecVT.getVectorElementType(), // Make sure to use original type
5001       VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5002   Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5003 
5004   // Convert back to a fixed vector if needed
5005   MVT ResultVT =
5006       MVT::getVectorVT(VecVT.getVectorElementType(),
5007                        VecVT.getVectorElementCount().multiplyCoefficientBy(2));
5008   if (ResultVT.isFixedLengthVector())
5009     Interleaved =
5010         convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5011 
5012   return Interleaved;
5013 }
5014 
5015 // If we have a vector of bits that we want to reverse, we can use a vbrev on a
5016 // larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5017 static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
5018                                       SelectionDAG &DAG,
5019                                       const RISCVSubtarget &Subtarget) {
5020   SDLoc DL(SVN);
5021   MVT VT = SVN->getSimpleValueType(0);
5022   SDValue V = SVN->getOperand(0);
5023   unsigned NumElts = VT.getVectorNumElements();
5024 
5025   assert(VT.getVectorElementType() == MVT::i1);
5026 
5027   if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),
5028                                         SVN->getMask().size()) ||
5029       !SVN->getOperand(1).isUndef())
5030     return SDValue();
5031 
5032   unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5033   EVT ViaVT = EVT::getVectorVT(
5034       *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5035   EVT ViaBitVT =
5036       EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5037 
5038   // If we don't have zvbb or the larger element type > ELEN, the operation will
5039   // be illegal.
5040   if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,
5041                                                                ViaVT) ||
5042       !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5043     return SDValue();
5044 
5045   // If the bit vector doesn't fit exactly into the larger element type, we need
5046   // to insert it into the larger vector and then shift up the reversed bits
5047   // afterwards to get rid of the gap introduced.
5048   if (ViaEltSize > NumElts)
5049     V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5050                     V, DAG.getVectorIdxConstant(0, DL));
5051 
5052   SDValue Res =
5053       DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5054 
5055   // Shift up the reversed bits if the vector didn't exactly fit into the larger
5056   // element type.
5057   if (ViaEltSize > NumElts)
5058     Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5059                       DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5060 
5061   Res = DAG.getBitcast(ViaBitVT, Res);
5062 
5063   if (ViaEltSize > NumElts)
5064     Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5065                       DAG.getVectorIdxConstant(0, DL));
5066   return Res;
5067 }
5068 
5069 static bool isLegalBitRotate(ShuffleVectorSDNode *SVN,
5070                              SelectionDAG &DAG,
5071                              const RISCVSubtarget &Subtarget,
5072                              MVT &RotateVT, unsigned &RotateAmt) {
5073   SDLoc DL(SVN);
5074 
5075   EVT VT = SVN->getValueType(0);
5076   unsigned NumElts = VT.getVectorNumElements();
5077   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5078   unsigned NumSubElts;
5079   if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5080                                           NumElts, NumSubElts, RotateAmt))
5081     return false;
5082   RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5083                                   NumElts / NumSubElts);
5084 
5085   // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5086   return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5087 }
5088 
5089 // Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5090 // reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5091 // as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5092 static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
5093                                            SelectionDAG &DAG,
5094                                            const RISCVSubtarget &Subtarget) {
5095   SDLoc DL(SVN);
5096 
5097   EVT VT = SVN->getValueType(0);
5098   unsigned RotateAmt;
5099   MVT RotateVT;
5100   if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5101     return SDValue();
5102 
5103   SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5104 
5105   SDValue Rotate;
5106   // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5107   // so canonicalize to vrev8.
5108   if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5109     Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5110   else
5111     Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5112                          DAG.getConstant(RotateAmt, DL, RotateVT));
5113 
5114   return DAG.getBitcast(VT, Rotate);
5115 }
5116 
5117 // If compiling with an exactly known VLEN, see if we can split a
5118 // shuffle on m2 or larger into a small number of m1 sized shuffles
5119 // which write each destination registers exactly once.
5120 static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
5121                                             SelectionDAG &DAG,
5122                                             const RISCVSubtarget &Subtarget) {
5123   SDLoc DL(SVN);
5124   MVT VT = SVN->getSimpleValueType(0);
5125   SDValue V1 = SVN->getOperand(0);
5126   SDValue V2 = SVN->getOperand(1);
5127   ArrayRef<int> Mask = SVN->getMask();
5128 
5129   // If we don't know exact data layout, not much we can do.  If this
5130   // is already m1 or smaller, no point in splitting further.
5131   const auto VLen = Subtarget.getRealVLen();
5132   if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5133     return SDValue();
5134 
5135   // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5136   // expansion for.
5137   unsigned RotateAmt;
5138   MVT RotateVT;
5139   if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5140     return SDValue();
5141 
5142   MVT ElemVT = VT.getVectorElementType();
5143   unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5144 
5145   EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5146   MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5147   MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5148   assert(M1VT == getLMUL1VT(M1VT));
5149   unsigned NumOpElts = M1VT.getVectorMinNumElements();
5150   unsigned NumElts = ContainerVT.getVectorMinNumElements();
5151   unsigned NumOfSrcRegs = NumElts / NumOpElts;
5152   unsigned NumOfDestRegs = NumElts / NumOpElts;
5153   // The following semantically builds up a fixed length concat_vector
5154   // of the component shuffle_vectors.  We eagerly lower to scalable here
5155   // to avoid DAG combining it back to a large shuffle_vector again.
5156   V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5157   V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5158   SmallVector<SmallVector<std::tuple<unsigned, unsigned, SmallVector<int>>>>
5159       Operands;
5160   processShuffleMasks(
5161       Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5162       [&]() { Operands.emplace_back(); },
5163       [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5164         Operands.emplace_back().emplace_back(
5165             SrcVecIdx, UINT_MAX,
5166             SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5167       },
5168       [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5169         if (NewReg)
5170           Operands.emplace_back();
5171         Operands.back().emplace_back(
5172             Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5173       });
5174   assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5175   // Note: check that we do not emit too many shuffles here to prevent code
5176   // size explosion.
5177   // TODO: investigate, if it can be improved by extra analysis of the masks to
5178   // check if the code is more profitable.
5179   unsigned NumShuffles = std::accumulate(
5180       Operands.begin(), Operands.end(), 0u,
5181       [&](unsigned N,
5182           ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5183         if (Data.empty())
5184           return N;
5185         N += Data.size();
5186         for (const auto &P : Data) {
5187           unsigned Idx2 = std::get<1>(P);
5188           ArrayRef<int> Mask = std::get<2>(P);
5189           if (Idx2 != UINT_MAX)
5190             ++N;
5191           else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5192             --N;
5193         }
5194         return N;
5195       });
5196   if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5197       (NumOfDestRegs <= 2 && NumShuffles >= 4))
5198     return SDValue();
5199   auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5200     SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5201                                  DAG.getVectorIdxConstant(ExtractIdx, DL));
5202     SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5203     return SubVec;
5204   };
5205   auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5206                                         ArrayRef<int> Mask) {
5207     SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5208     return SubVec;
5209   };
5210   SDValue Vec = DAG.getUNDEF(ContainerVT);
5211   for (auto [I, Data] : enumerate(Operands)) {
5212     if (Data.empty())
5213       continue;
5214     SmallDenseMap<unsigned, SDValue, 4> Values;
5215     for (unsigned I : seq<unsigned>(Data.size())) {
5216       const auto &[Idx1, Idx2, _] = Data[I];
5217       if (Values.contains(Idx1)) {
5218         assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
5219                "Expected both indices to be extracted already.");
5220         break;
5221       }
5222       SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5223                                (Idx1 % NumOfSrcRegs) * NumOpElts);
5224       Values[Idx1] = V;
5225       if (Idx2 != UINT_MAX)
5226         Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5227                                     (Idx2 % NumOfSrcRegs) * NumOpElts);
5228     }
5229     SDValue V;
5230     for (const auto &[Idx1, Idx2, Mask] : Data) {
5231       SDValue V1 = Values.at(Idx1);
5232       SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5233       V = PerformShuffle(V1, V2, Mask);
5234       Values[Idx1] = V;
5235     }
5236 
5237     unsigned InsertIdx = I * NumOpElts;
5238     V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5239     Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
5240                       DAG.getVectorIdxConstant(InsertIdx, DL));
5241   }
5242   return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5243 }
5244 
5245 // Matches a subset of compress masks with a contiguous prefix of output
5246 // elements.  This could be extended to allow gaps by deciding which
5247 // source elements to spuriously demand.
5248 static bool isCompressMask(ArrayRef<int> Mask) {
5249   int Last = -1;
5250   bool SawUndef = false;
5251   for (unsigned i = 0; i < Mask.size(); i++) {
5252     if (Mask[i] == -1) {
5253       SawUndef = true;
5254       continue;
5255     }
5256     if (SawUndef)
5257       return false;
5258     if (i > (unsigned)Mask[i])
5259       return false;
5260     if (Mask[i] <= Last)
5261       return false;
5262     Last = Mask[i];
5263   }
5264   return true;
5265 }
5266 
5267 /// Given a shuffle where the indices are disjoint between the two sources,
5268 /// e.g.:
5269 ///
5270 /// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5271 ///
5272 /// Merge the two sources into one and do a single source shuffle:
5273 ///
5274 /// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5275 /// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5276 ///
5277 /// A vselect will either be merged into a masked instruction or be lowered as a
5278 /// vmerge.vvm, which is cheaper than a vrgather.vv.
5279 static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,
5280                                            SelectionDAG &DAG,
5281                                            const RISCVSubtarget &Subtarget) {
5282   MVT VT = SVN->getSimpleValueType(0);
5283   MVT XLenVT = Subtarget.getXLenVT();
5284   SDLoc DL(SVN);
5285 
5286   const ArrayRef<int> Mask = SVN->getMask();
5287 
5288   // Work out which source each lane will come from.
5289   SmallVector<int, 16> Srcs(Mask.size(), -1);
5290 
5291   for (int Idx : Mask) {
5292     if (Idx == -1)
5293       continue;
5294     unsigned SrcIdx = Idx % Mask.size();
5295     int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5296     if (Srcs[SrcIdx] == -1)
5297       // Mark this source as using this lane.
5298       Srcs[SrcIdx] = Src;
5299     else if (Srcs[SrcIdx] != Src)
5300       // The other source is using this lane: not disjoint.
5301       return SDValue();
5302   }
5303 
5304   SmallVector<SDValue> SelectMaskVals;
5305   for (int Lane : Srcs) {
5306     if (Lane == -1)
5307       SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5308     else
5309       SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5310   }
5311   MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5312   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5313   SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5314                                SVN->getOperand(0), SVN->getOperand(1));
5315 
5316   // Move all indices relative to the first source.
5317   SmallVector<int> NewMask(Mask.size());
5318   for (unsigned I = 0; I < Mask.size(); I++) {
5319     if (Mask[I] == -1)
5320       NewMask[I] = -1;
5321     else
5322       NewMask[I] = Mask[I] % Mask.size();
5323   }
5324 
5325   return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5326 }
5327 
5328 /// Try to widen element type to get a new mask value for a better permutation
5329 /// sequence.  This doesn't try to inspect the widened mask for profitability;
5330 /// we speculate the widened form is equal or better.  This has the effect of
5331 /// reducing mask constant sizes - allowing cheaper materialization sequences
5332 /// - and index sequence sizes - reducing register pressure and materialization
5333 /// cost, at the cost of (possibly) an extra VTYPE toggle.
5334 static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
5335   SDLoc DL(Op);
5336   MVT VT = Op.getSimpleValueType();
5337   MVT ScalarVT = VT.getVectorElementType();
5338   unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5339   SDValue V0 = Op.getOperand(0);
5340   SDValue V1 = Op.getOperand(1);
5341   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5342 
5343   // Avoid wasted work leading to isTypeLegal check failing below
5344   if (ElementSize > 32)
5345     return SDValue();
5346 
5347   SmallVector<int, 8> NewMask;
5348   if (!widenShuffleMaskElts(Mask, NewMask))
5349     return SDValue();
5350 
5351   MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5352                                       : MVT::getIntegerVT(ElementSize * 2);
5353   MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5354   if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5355     return SDValue();
5356   V0 = DAG.getBitcast(NewVT, V0);
5357   V1 = DAG.getBitcast(NewVT, V1);
5358   return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5359 }
5360 
5361 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
5362                                    const RISCVSubtarget &Subtarget) {
5363   SDValue V1 = Op.getOperand(0);
5364   SDValue V2 = Op.getOperand(1);
5365   SDLoc DL(Op);
5366   MVT XLenVT = Subtarget.getXLenVT();
5367   MVT VT = Op.getSimpleValueType();
5368   unsigned NumElts = VT.getVectorNumElements();
5369   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5370 
5371   if (VT.getVectorElementType() == MVT::i1) {
5372     // Lower to a vror.vi of a larger element type if possible before we promote
5373     // i1s to i8s.
5374     if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5375       return V;
5376     if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5377       return V;
5378 
5379     // Promote i1 shuffle to i8 shuffle.
5380     MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5381     V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5382     V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5383                       : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5384     SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5385     return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5386                         ISD::SETNE);
5387   }
5388 
5389   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5390 
5391   auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5392 
5393   if (SVN->isSplat()) {
5394     const int Lane = SVN->getSplatIndex();
5395     if (Lane >= 0) {
5396       MVT SVT = VT.getVectorElementType();
5397 
5398       // Turn splatted vector load into a strided load with an X0 stride.
5399       SDValue V = V1;
5400       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5401       // with undef.
5402       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5403       int Offset = Lane;
5404       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5405         int OpElements =
5406             V.getOperand(0).getSimpleValueType().getVectorNumElements();
5407         V = V.getOperand(Offset / OpElements);
5408         Offset %= OpElements;
5409       }
5410 
5411       // We need to ensure the load isn't atomic or volatile.
5412       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5413         auto *Ld = cast<LoadSDNode>(V);
5414         Offset *= SVT.getStoreSize();
5415         SDValue NewAddr = DAG.getMemBasePlusOffset(
5416             Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5417 
5418         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5419         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5420           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5421           SDValue IntID =
5422               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5423           SDValue Ops[] = {Ld->getChain(),
5424                            IntID,
5425                            DAG.getUNDEF(ContainerVT),
5426                            NewAddr,
5427                            DAG.getRegister(RISCV::X0, XLenVT),
5428                            VL};
5429           SDValue NewLoad = DAG.getMemIntrinsicNode(
5430               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5431               DAG.getMachineFunction().getMachineMemOperand(
5432                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5433           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5434           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5435         }
5436 
5437         MVT SplatVT = ContainerVT;
5438 
5439         // f16 with zvfhmin and bf16 need to use an integer scalar load.
5440         if (SVT == MVT::bf16 ||
5441             (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5442           SVT = MVT::i16;
5443           SplatVT = ContainerVT.changeVectorElementType(SVT);
5444         }
5445 
5446         // Otherwise use a scalar load and splat. This will give the best
5447         // opportunity to fold a splat into the operation. ISel can turn it into
5448         // the x0 strided load if we aren't able to fold away the select.
5449         if (SVT.isFloatingPoint())
5450           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5451                           Ld->getPointerInfo().getWithOffset(Offset),
5452                           Ld->getOriginalAlign(),
5453                           Ld->getMemOperand()->getFlags());
5454         else
5455           V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5456                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
5457                              Ld->getOriginalAlign(),
5458                              Ld->getMemOperand()->getFlags());
5459         DAG.makeEquivalentMemoryOrdering(Ld, V);
5460 
5461         unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5462                                                  : RISCVISD::VMV_V_X_VL;
5463         SDValue Splat =
5464             DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5465         Splat = DAG.getBitcast(ContainerVT, Splat);
5466         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5467       }
5468 
5469       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5470       assert(Lane < (int)NumElts && "Unexpected lane!");
5471       SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5472                                    V1, DAG.getConstant(Lane, DL, XLenVT),
5473                                    DAG.getUNDEF(ContainerVT), TrueMask, VL);
5474       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5475     }
5476   }
5477 
5478   // For exact VLEN m2 or greater, try to split to m1 operations if we
5479   // can split cleanly.
5480   if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5481     return V;
5482 
5483   ArrayRef<int> Mask = SVN->getMask();
5484 
5485   if (SDValue V =
5486           lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5487     return V;
5488 
5489   if (SDValue V =
5490           lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5491     return V;
5492 
5493   // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5494   // available.
5495   if (Subtarget.hasStdExtZvkb())
5496     if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5497       return V;
5498 
5499   // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5500   // be undef which can be handled with a single SLIDEDOWN/UP.
5501   int LoSrc, HiSrc;
5502   int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5503   if (Rotation > 0) {
5504     SDValue LoV, HiV;
5505     if (LoSrc >= 0) {
5506       LoV = LoSrc == 0 ? V1 : V2;
5507       LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5508     }
5509     if (HiSrc >= 0) {
5510       HiV = HiSrc == 0 ? V1 : V2;
5511       HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5512     }
5513 
5514     // We found a rotation. We need to slide HiV down by Rotation. Then we need
5515     // to slide LoV up by (NumElts - Rotation).
5516     unsigned InvRotate = NumElts - Rotation;
5517 
5518     SDValue Res = DAG.getUNDEF(ContainerVT);
5519     if (HiV) {
5520       // Even though we could use a smaller VL, don't to avoid a vsetivli
5521       // toggle.
5522       Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5523                           DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5524     }
5525     if (LoV)
5526       Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5527                         DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5528                         RISCVII::TAIL_AGNOSTIC);
5529 
5530     return convertFromScalableVector(VT, Res, DAG, Subtarget);
5531   }
5532 
5533   if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5534     return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5535 
5536   // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5537   // use shift and truncate to perform the shuffle.
5538   // TODO: For Factor=6, we can perform the first step of the deinterleave via
5539   // shift-and-trunc reducing total cost for everything except an mf8 result.
5540   // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5541   // to do the entire operation.
5542   if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5543     const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5544     assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5545     for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5546       unsigned Index = 0;
5547       if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5548           1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5549         if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5550           return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5551       }
5552     }
5553   }
5554 
5555   if (SDValue V =
5556           lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5557     return V;
5558 
5559   // Detect an interleave shuffle and lower to
5560   // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5561   int EvenSrc, OddSrc;
5562   if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5563     // Extract the halves of the vectors.
5564     MVT HalfVT = VT.getHalfNumVectorElementsVT();
5565 
5566     // Recognize if one half is actually undef; the matching above will
5567     // otherwise reuse the even stream for the undef one.  This improves
5568     // spread(2) shuffles.
5569     bool LaneIsUndef[2] = { true, true};
5570     for (unsigned i = 0; i < Mask.size(); i++)
5571       LaneIsUndef[i % 2] &= (Mask[i] == -1);
5572 
5573     int Size = Mask.size();
5574     SDValue EvenV, OddV;
5575     if (LaneIsUndef[0]) {
5576       EvenV = DAG.getUNDEF(HalfVT);
5577     } else {
5578       assert(EvenSrc >= 0 && "Undef source?");
5579       EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5580       EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5581                           DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5582     }
5583 
5584     if (LaneIsUndef[1]) {
5585       OddV = DAG.getUNDEF(HalfVT);
5586     } else {
5587       assert(OddSrc >= 0 && "Undef source?");
5588       OddV = (OddSrc / Size) == 0 ? V1 : V2;
5589       OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5590                          DAG.getVectorIdxConstant(OddSrc % Size, DL));
5591     }
5592 
5593     return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5594   }
5595 
5596 
5597   // Handle any remaining single source shuffles
5598   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5599   if (V2.isUndef()) {
5600     // We might be able to express the shuffle as a bitrotate. But even if we
5601     // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5602     // shifts and a vor will have a higher throughput than a vrgather.
5603     if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5604       return V;
5605 
5606     // Before hitting generic lowering fallbacks, try to widen the mask
5607     // to a wider SEW.
5608     if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5609       return V;
5610 
5611     // Can we generate a vcompress instead of a vrgather?  These scale better
5612     // at high LMUL, at the cost of not being able to fold a following select
5613     // into them.  The mask constants are also smaller than the index vector
5614     // constants, and thus easier to materialize.
5615     if (isCompressMask(Mask)) {
5616       SmallVector<SDValue> MaskVals(NumElts,
5617                                     DAG.getConstant(false, DL, XLenVT));
5618       for (auto Idx : Mask) {
5619         if (Idx == -1)
5620           break;
5621         assert(Idx >= 0 && (unsigned)Idx < NumElts);
5622         MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5623       }
5624       MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5625       SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5626       return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5627                          DAG.getUNDEF(VT));
5628     }
5629 
5630     // Match a spread(4,8) which can be done via extend and shift.  Spread(2)
5631     // is fully covered in interleave(2) above, so it is ignored here.
5632     if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5633       unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5634       assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5635       for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5636         unsigned Index;
5637         if (isSpreadMask(Mask, Factor, Index)) {
5638           MVT NarrowVT =
5639               MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5640           SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5641                                     DAG.getVectorIdxConstant(0, DL));
5642           return getWideningSpread(Src, Factor, Index, DL, DAG);
5643         }
5644       }
5645     }
5646 
5647     if (VT.getScalarSizeInBits() == 8 &&
5648         any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5649       // On such a vector we're unable to use i8 as the index type.
5650       // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5651       // may involve vector splitting if we're already at LMUL=8, or our
5652       // user-supplied maximum fixed-length LMUL.
5653       return SDValue();
5654     }
5655 
5656     // Base case for the two operand recursion below - handle the worst case
5657     // single source shuffle.
5658     unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5659     MVT IndexVT = VT.changeTypeToInteger();
5660     // Since we can't introduce illegal index types at this stage, use i16 and
5661     // vrgatherei16 if the corresponding index type for plain vrgather is greater
5662     // than XLenVT.
5663     if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5664       GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5665       IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5666     }
5667 
5668     // If the mask allows, we can do all the index computation in 16 bits.  This
5669     // requires less work and less register pressure at high LMUL, and creates
5670     // smaller constants which may be cheaper to materialize.
5671     if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5672         (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5673       GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5674       IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5675     }
5676 
5677     MVT IndexContainerVT =
5678       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5679 
5680     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5681     SmallVector<SDValue> GatherIndicesLHS;
5682     for (int MaskIndex : Mask) {
5683       bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5684       GatherIndicesLHS.push_back(IsLHSIndex
5685                                  ? DAG.getConstant(MaskIndex, DL, XLenVT)
5686                                  : DAG.getUNDEF(XLenVT));
5687     }
5688     SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5689     LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5690                                          Subtarget);
5691     SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5692                                  DAG.getUNDEF(ContainerVT), TrueMask, VL);
5693     return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5694   }
5695 
5696   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5697   // merged with a second vrgather.
5698   SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5699 
5700   // Now construct the mask that will be used by the blended vrgather operation.
5701   // Construct the appropriate indices into each vector.
5702   for (int MaskIndex : Mask) {
5703     bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5704     ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5705                              ? MaskIndex : -1);
5706     ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5707   }
5708 
5709   // If the mask indices are disjoint between the two sources, we can lower it
5710   // as a vselect + a single source vrgather.vv. Don't do this if we think the
5711   // operands may end up being lowered to something cheaper than a vrgather.vv.
5712   if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5713       !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5714       !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5715       !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5716       !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5717     if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5718       return V;
5719 
5720   // Before hitting generic lowering fallbacks, try to widen the mask
5721   // to a wider SEW.
5722   if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5723     return V;
5724 
5725   // Try to pick a profitable operand order.
5726   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5727   SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5728 
5729   // Recursively invoke lowering for each operand if we had two
5730   // independent single source shuffles, and then combine the result via a
5731   // vselect.  Note that the vselect will likely be folded back into the
5732   // second permute (vrgather, or other) by the post-isel combine.
5733   V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5734   V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5735 
5736   SmallVector<SDValue> MaskVals;
5737   for (int MaskIndex : Mask) {
5738     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5739     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5740   }
5741 
5742   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5743   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5744   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5745 
5746   if (SwapOps)
5747     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5748   return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5749 }
5750 
5751 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
5752   // Only support legal VTs for other shuffles for now.
5753   if (!isTypeLegal(VT))
5754     return false;
5755 
5756   // Support splats for any type. These should type legalize well.
5757   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5758     return true;
5759 
5760   MVT SVT = VT.getSimpleVT();
5761 
5762   // Not for i1 vectors.
5763   if (SVT.getScalarType() == MVT::i1)
5764     return false;
5765 
5766   int Dummy1, Dummy2;
5767   return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5768          isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5769 }
5770 
5771 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5772 // the exponent.
5773 SDValue
5774 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5775                                                SelectionDAG &DAG) const {
5776   MVT VT = Op.getSimpleValueType();
5777   unsigned EltSize = VT.getScalarSizeInBits();
5778   SDValue Src = Op.getOperand(0);
5779   SDLoc DL(Op);
5780   MVT ContainerVT = VT;
5781 
5782   SDValue Mask, VL;
5783   if (Op->isVPOpcode()) {
5784     Mask = Op.getOperand(1);
5785     if (VT.isFixedLengthVector())
5786       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5787                                      Subtarget);
5788     VL = Op.getOperand(2);
5789   }
5790 
5791   // We choose FP type that can represent the value if possible. Otherwise, we
5792   // use rounding to zero conversion for correct exponent of the result.
5793   // TODO: Use f16 for i8 when possible?
5794   MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5795   if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5796     FloatEltVT = MVT::f32;
5797   MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5798 
5799   // Legal types should have been checked in the RISCVTargetLowering
5800   // constructor.
5801   // TODO: Splitting may make sense in some cases.
5802   assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5803          "Expected legal float type!");
5804 
5805   // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5806   // The trailing zero count is equal to log2 of this single bit value.
5807   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5808     SDValue Neg = DAG.getNegative(Src, DL, VT);
5809     Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5810   } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5811     SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5812                               Src, Mask, VL);
5813     Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5814   }
5815 
5816   // We have a legal FP type, convert to it.
5817   SDValue FloatVal;
5818   if (FloatVT.bitsGT(VT)) {
5819     if (Op->isVPOpcode())
5820       FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5821     else
5822       FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5823   } else {
5824     // Use RTZ to avoid rounding influencing exponent of FloatVal.
5825     if (VT.isFixedLengthVector()) {
5826       ContainerVT = getContainerForFixedLengthVector(VT);
5827       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5828     }
5829     if (!Op->isVPOpcode())
5830       std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5831     SDValue RTZRM =
5832         DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
5833     MVT ContainerFloatVT =
5834         MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5835     FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5836                            Src, Mask, RTZRM, VL);
5837     if (VT.isFixedLengthVector())
5838       FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5839   }
5840   // Bitcast to integer and shift the exponent to the LSB.
5841   EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5842   SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5843   unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5844 
5845   SDValue Exp;
5846   // Restore back to original type. Truncation after SRL is to generate vnsrl.
5847   if (Op->isVPOpcode()) {
5848     Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5849                       DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5850     Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5851   } else {
5852     Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5853                       DAG.getConstant(ShiftAmt, DL, IntVT));
5854     if (IntVT.bitsLT(VT))
5855       Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5856     else if (IntVT.bitsGT(VT))
5857       Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5858   }
5859 
5860   // The exponent contains log2 of the value in biased form.
5861   unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5862   // For trailing zeros, we just need to subtract the bias.
5863   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5864     return DAG.getNode(ISD::SUB, DL, VT, Exp,
5865                        DAG.getConstant(ExponentBias, DL, VT));
5866   if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5867     return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5868                        DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5869 
5870   // For leading zeros, we need to remove the bias and convert from log2 to
5871   // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5872   unsigned Adjust = ExponentBias + (EltSize - 1);
5873   SDValue Res;
5874   if (Op->isVPOpcode())
5875     Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5876                       Mask, VL);
5877   else
5878     Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5879 
5880   // The above result with zero input equals to Adjust which is greater than
5881   // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5882   if (Op.getOpcode() == ISD::CTLZ)
5883     Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5884   else if (Op.getOpcode() == ISD::VP_CTLZ)
5885     Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5886                       DAG.getConstant(EltSize, DL, VT), Mask, VL);
5887   return Res;
5888 }
5889 
5890 SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5891                                                  SelectionDAG &DAG) const {
5892   SDLoc DL(Op);
5893   MVT XLenVT = Subtarget.getXLenVT();
5894   SDValue Source = Op->getOperand(0);
5895   MVT SrcVT = Source.getSimpleValueType();
5896   SDValue Mask = Op->getOperand(1);
5897   SDValue EVL = Op->getOperand(2);
5898 
5899   if (SrcVT.isFixedLengthVector()) {
5900     MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5901     Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5902     Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5903                                    Subtarget);
5904     SrcVT = ContainerVT;
5905   }
5906 
5907   // Convert to boolean vector.
5908   if (SrcVT.getScalarType() != MVT::i1) {
5909     SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5910     SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5911     Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5912                          {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5913                           DAG.getUNDEF(SrcVT), Mask, EVL});
5914   }
5915 
5916   SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5917   if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5918     // In this case, we can interpret poison as -1, so nothing to do further.
5919     return Res;
5920 
5921   // Convert -1 to VL.
5922   SDValue SetCC =
5923       DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5924   Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5925   return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5926 }
5927 
5928 // While RVV has alignment restrictions, we should always be able to load as a
5929 // legal equivalently-sized byte-typed vector instead. This method is
5930 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5931 // the load is already correctly-aligned, it returns SDValue().
5932 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5933                                                     SelectionDAG &DAG) const {
5934   auto *Load = cast<LoadSDNode>(Op);
5935   assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5936 
5937   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5938                                      Load->getMemoryVT(),
5939                                      *Load->getMemOperand()))
5940     return SDValue();
5941 
5942   SDLoc DL(Op);
5943   MVT VT = Op.getSimpleValueType();
5944   unsigned EltSizeBits = VT.getScalarSizeInBits();
5945   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5946          "Unexpected unaligned RVV load type");
5947   MVT NewVT =
5948       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5949   assert(NewVT.isValid() &&
5950          "Expecting equally-sized RVV vector types to be legal");
5951   SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5952                           Load->getPointerInfo(), Load->getOriginalAlign(),
5953                           Load->getMemOperand()->getFlags());
5954   return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5955 }
5956 
5957 // While RVV has alignment restrictions, we should always be able to store as a
5958 // legal equivalently-sized byte-typed vector instead. This method is
5959 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5960 // returns SDValue() if the store is already correctly aligned.
5961 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5962                                                      SelectionDAG &DAG) const {
5963   auto *Store = cast<StoreSDNode>(Op);
5964   assert(Store && Store->getValue().getValueType().isVector() &&
5965          "Expected vector store");
5966 
5967   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
5968                                      Store->getMemoryVT(),
5969                                      *Store->getMemOperand()))
5970     return SDValue();
5971 
5972   SDLoc DL(Op);
5973   SDValue StoredVal = Store->getValue();
5974   MVT VT = StoredVal.getSimpleValueType();
5975   unsigned EltSizeBits = VT.getScalarSizeInBits();
5976   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5977          "Unexpected unaligned RVV store type");
5978   MVT NewVT =
5979       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5980   assert(NewVT.isValid() &&
5981          "Expecting equally-sized RVV vector types to be legal");
5982   StoredVal = DAG.getBitcast(NewVT, StoredVal);
5983   return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5984                       Store->getPointerInfo(), Store->getOriginalAlign(),
5985                       Store->getMemOperand()->getFlags());
5986 }
5987 
5988 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
5989                              const RISCVSubtarget &Subtarget) {
5990   assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5991 
5992   int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5993 
5994   // All simm32 constants should be handled by isel.
5995   // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5996   // this check redundant, but small immediates are common so this check
5997   // should have better compile time.
5998   if (isInt<32>(Imm))
5999     return Op;
6000 
6001   // We only need to cost the immediate, if constant pool lowering is enabled.
6002   if (!Subtarget.useConstantPoolForLargeInts())
6003     return Op;
6004 
6005   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
6006   if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6007     return Op;
6008 
6009   // Optimizations below are disabled for opt size. If we're optimizing for
6010   // size, use a constant pool.
6011   if (DAG.shouldOptForSize())
6012     return SDValue();
6013 
6014   // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6015   // that if it will avoid a constant pool.
6016   // It will require an extra temporary register though.
6017   // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6018   // low and high 32 bits are the same and bit 31 and 63 are set.
6019   unsigned ShiftAmt, AddOpc;
6020   RISCVMatInt::InstSeq SeqLo =
6021       RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6022   if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6023     return Op;
6024 
6025   return SDValue();
6026 }
6027 
6028 SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6029                                              SelectionDAG &DAG) const {
6030   MVT VT = Op.getSimpleValueType();
6031   const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6032 
6033   // Can this constant be selected by a Zfa FLI instruction?
6034   bool Negate = false;
6035   int Index = getLegalZfaFPImm(Imm, VT);
6036 
6037   // If the constant is negative, try negating.
6038   if (Index < 0 && Imm.isNegative()) {
6039     Index = getLegalZfaFPImm(-Imm, VT);
6040     Negate = true;
6041   }
6042 
6043   // If we couldn't find a FLI lowering, fall back to generic code.
6044   if (Index < 0)
6045     return SDValue();
6046 
6047   // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6048   SDLoc DL(Op);
6049   SDValue Const =
6050       DAG.getNode(RISCVISD::FLI, DL, VT,
6051                   DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6052   if (!Negate)
6053     return Const;
6054 
6055   return DAG.getNode(ISD::FNEG, DL, VT, Const);
6056 }
6057 
6058 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
6059                                  const RISCVSubtarget &Subtarget) {
6060   SDLoc dl(Op);
6061   AtomicOrdering FenceOrdering =
6062       static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6063   SyncScope::ID FenceSSID =
6064       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6065 
6066   if (Subtarget.hasStdExtZtso()) {
6067     // The only fence that needs an instruction is a sequentially-consistent
6068     // cross-thread fence.
6069     if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6070         FenceSSID == SyncScope::System)
6071       return Op;
6072 
6073     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6074     return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6075   }
6076 
6077   // singlethread fences only synchronize with signal handlers on the same
6078   // thread and thus only need to preserve instruction order, not actually
6079   // enforce memory ordering.
6080   if (FenceSSID == SyncScope::SingleThread)
6081     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6082     return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6083 
6084   return Op;
6085 }
6086 
6087 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6088                                              SelectionDAG &DAG) const {
6089   SDLoc DL(Op);
6090   MVT VT = Op.getSimpleValueType();
6091   MVT XLenVT = Subtarget.getXLenVT();
6092   unsigned Check = Op.getConstantOperandVal(1);
6093   unsigned TDCMask = 0;
6094   if (Check & fcSNan)
6095     TDCMask |= RISCV::FPMASK_Signaling_NaN;
6096   if (Check & fcQNan)
6097     TDCMask |= RISCV::FPMASK_Quiet_NaN;
6098   if (Check & fcPosInf)
6099     TDCMask |= RISCV::FPMASK_Positive_Infinity;
6100   if (Check & fcNegInf)
6101     TDCMask |= RISCV::FPMASK_Negative_Infinity;
6102   if (Check & fcPosNormal)
6103     TDCMask |= RISCV::FPMASK_Positive_Normal;
6104   if (Check & fcNegNormal)
6105     TDCMask |= RISCV::FPMASK_Negative_Normal;
6106   if (Check & fcPosSubnormal)
6107     TDCMask |= RISCV::FPMASK_Positive_Subnormal;
6108   if (Check & fcNegSubnormal)
6109     TDCMask |= RISCV::FPMASK_Negative_Subnormal;
6110   if (Check & fcPosZero)
6111     TDCMask |= RISCV::FPMASK_Positive_Zero;
6112   if (Check & fcNegZero)
6113     TDCMask |= RISCV::FPMASK_Negative_Zero;
6114 
6115   bool IsOneBitMask = isPowerOf2_32(TDCMask);
6116 
6117   SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6118 
6119   if (VT.isVector()) {
6120     SDValue Op0 = Op.getOperand(0);
6121     MVT VT0 = Op.getOperand(0).getSimpleValueType();
6122 
6123     if (VT.isScalableVector()) {
6124       MVT DstVT = VT0.changeVectorElementTypeToInteger();
6125       auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6126       if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6127         Mask = Op.getOperand(2);
6128         VL = Op.getOperand(3);
6129       }
6130       SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6131                                     VL, Op->getFlags());
6132       if (IsOneBitMask)
6133         return DAG.getSetCC(DL, VT, FPCLASS,
6134                             DAG.getConstant(TDCMask, DL, DstVT),
6135                             ISD::CondCode::SETEQ);
6136       SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6137                                 DAG.getConstant(TDCMask, DL, DstVT));
6138       return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6139                           ISD::SETNE);
6140     }
6141 
6142     MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6143     MVT ContainerVT = getContainerForFixedLengthVector(VT);
6144     MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6145     auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6146     if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6147       Mask = Op.getOperand(2);
6148       MVT MaskContainerVT =
6149           getContainerForFixedLengthVector(Mask.getSimpleValueType());
6150       Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6151       VL = Op.getOperand(3);
6152     }
6153     Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6154 
6155     SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6156                                   Mask, VL, Op->getFlags());
6157 
6158     TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6159                            DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6160     if (IsOneBitMask) {
6161       SDValue VMSEQ =
6162           DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6163                       {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6164                        DAG.getUNDEF(ContainerVT), Mask, VL});
6165       return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6166     }
6167     SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6168                               TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6169 
6170     SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6171     SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6172                             DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6173 
6174     SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6175                                 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6176                                  DAG.getUNDEF(ContainerVT), Mask, VL});
6177     return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6178   }
6179 
6180   SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6181   SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6182   SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6183                              ISD::CondCode::SETNE);
6184   return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6185 }
6186 
6187 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6188 // operations propagate nans.
6189 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
6190                                       const RISCVSubtarget &Subtarget) {
6191   SDLoc DL(Op);
6192   MVT VT = Op.getSimpleValueType();
6193 
6194   SDValue X = Op.getOperand(0);
6195   SDValue Y = Op.getOperand(1);
6196 
6197   if (!VT.isVector()) {
6198     MVT XLenVT = Subtarget.getXLenVT();
6199 
6200     // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6201     // ensures that when one input is a nan, the other will also be a nan
6202     // allowing the nan to propagate. If both inputs are nan, this will swap the
6203     // inputs which is harmless.
6204 
6205     SDValue NewY = Y;
6206     if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6207       SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6208       NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6209     }
6210 
6211     SDValue NewX = X;
6212     if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6213       SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6214       NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6215     }
6216 
6217     unsigned Opc =
6218         Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6219     return DAG.getNode(Opc, DL, VT, NewX, NewY);
6220   }
6221 
6222   // Check no NaNs before converting to fixed vector scalable.
6223   bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6224   bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6225 
6226   MVT ContainerVT = VT;
6227   if (VT.isFixedLengthVector()) {
6228     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6229     X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6230     Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6231   }
6232 
6233   SDValue Mask, VL;
6234   if (Op->isVPOpcode()) {
6235     Mask = Op.getOperand(2);
6236     if (VT.isFixedLengthVector())
6237       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6238                                      Subtarget);
6239     VL = Op.getOperand(3);
6240   } else {
6241     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6242   }
6243 
6244   SDValue NewY = Y;
6245   if (!XIsNeverNan) {
6246     SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6247                                     {X, X, DAG.getCondCode(ISD::SETOEQ),
6248                                      DAG.getUNDEF(ContainerVT), Mask, VL});
6249     NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6250                        DAG.getUNDEF(ContainerVT), VL);
6251   }
6252 
6253   SDValue NewX = X;
6254   if (!YIsNeverNan) {
6255     SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6256                                     {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6257                                      DAG.getUNDEF(ContainerVT), Mask, VL});
6258     NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6259                        DAG.getUNDEF(ContainerVT), VL);
6260   }
6261 
6262   unsigned Opc =
6263       Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6264           ? RISCVISD::VFMAX_VL
6265           : RISCVISD::VFMIN_VL;
6266   SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6267                             DAG.getUNDEF(ContainerVT), Mask, VL);
6268   if (VT.isFixedLengthVector())
6269     Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6270   return Res;
6271 }
6272 
6273 static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG,
6274                                const RISCVSubtarget &Subtarget) {
6275   bool IsFABS = Op.getOpcode() == ISD::FABS;
6276   assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6277          "Wrong opcode for lowering FABS or FNEG.");
6278 
6279   MVT XLenVT = Subtarget.getXLenVT();
6280   MVT VT = Op.getSimpleValueType();
6281   assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6282 
6283   SDLoc DL(Op);
6284   SDValue Fmv =
6285       DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6286 
6287   APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6288   Mask = Mask.sext(Subtarget.getXLen());
6289 
6290   unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6291   SDValue Logic =
6292       DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6293   return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6294 }
6295 
6296 static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG,
6297                               const RISCVSubtarget &Subtarget) {
6298   assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6299 
6300   MVT XLenVT = Subtarget.getXLenVT();
6301   MVT VT = Op.getSimpleValueType();
6302   assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6303 
6304   SDValue Mag = Op.getOperand(0);
6305   SDValue Sign = Op.getOperand(1);
6306 
6307   SDLoc DL(Op);
6308 
6309   // Get sign bit into an integer value.
6310   SDValue SignAsInt;
6311   unsigned SignSize = Sign.getValueSizeInBits();
6312   if (SignSize == Subtarget.getXLen()) {
6313     SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6314   } else if (SignSize == 16) {
6315     SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6316   } else if (SignSize == 32) {
6317     SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6318   } else if (SignSize == 64) {
6319     assert(XLenVT == MVT::i32 && "Unexpected type");
6320     // Copy the upper word to integer.
6321     SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6322                     .getValue(1);
6323     SignSize = 32;
6324   } else
6325     llvm_unreachable("Unexpected sign size");
6326 
6327   // Get the signbit at the right position for MagAsInt.
6328   int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6329   if (ShiftAmount > 0) {
6330     SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6331                             DAG.getConstant(ShiftAmount, DL, XLenVT));
6332   } else if (ShiftAmount < 0) {
6333     SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6334                             DAG.getConstant(-ShiftAmount, DL, XLenVT));
6335   }
6336 
6337   // Mask the sign bit and any bits above it. The extra bits will be dropped
6338   // when we convert back to FP.
6339   SDValue SignMask = DAG.getConstant(
6340       APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6341   SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6342 
6343   // Transform Mag value to integer, and clear the sign bit.
6344   SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6345   SDValue ClearSignMask = DAG.getConstant(
6346       APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6347   SDValue ClearedSign =
6348       DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6349 
6350   SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6351                                    SDNodeFlags::Disjoint);
6352 
6353   return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6354 }
6355 
6356 /// Get a RISC-V target specified VL op for a given SDNode.
6357 static unsigned getRISCVVLOp(SDValue Op) {
6358 #define OP_CASE(NODE)                                                          \
6359   case ISD::NODE:                                                              \
6360     return RISCVISD::NODE##_VL;
6361 #define VP_CASE(NODE)                                                          \
6362   case ISD::VP_##NODE:                                                         \
6363     return RISCVISD::NODE##_VL;
6364   // clang-format off
6365   switch (Op.getOpcode()) {
6366   default:
6367     llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6368   OP_CASE(ADD)
6369   OP_CASE(SUB)
6370   OP_CASE(MUL)
6371   OP_CASE(MULHS)
6372   OP_CASE(MULHU)
6373   OP_CASE(SDIV)
6374   OP_CASE(SREM)
6375   OP_CASE(UDIV)
6376   OP_CASE(UREM)
6377   OP_CASE(SHL)
6378   OP_CASE(SRA)
6379   OP_CASE(SRL)
6380   OP_CASE(ROTL)
6381   OP_CASE(ROTR)
6382   OP_CASE(BSWAP)
6383   OP_CASE(CTTZ)
6384   OP_CASE(CTLZ)
6385   OP_CASE(CTPOP)
6386   OP_CASE(BITREVERSE)
6387   OP_CASE(SADDSAT)
6388   OP_CASE(UADDSAT)
6389   OP_CASE(SSUBSAT)
6390   OP_CASE(USUBSAT)
6391   OP_CASE(AVGFLOORS)
6392   OP_CASE(AVGFLOORU)
6393   OP_CASE(AVGCEILS)
6394   OP_CASE(AVGCEILU)
6395   OP_CASE(FADD)
6396   OP_CASE(FSUB)
6397   OP_CASE(FMUL)
6398   OP_CASE(FDIV)
6399   OP_CASE(FNEG)
6400   OP_CASE(FABS)
6401   OP_CASE(FSQRT)
6402   OP_CASE(SMIN)
6403   OP_CASE(SMAX)
6404   OP_CASE(UMIN)
6405   OP_CASE(UMAX)
6406   OP_CASE(STRICT_FADD)
6407   OP_CASE(STRICT_FSUB)
6408   OP_CASE(STRICT_FMUL)
6409   OP_CASE(STRICT_FDIV)
6410   OP_CASE(STRICT_FSQRT)
6411   VP_CASE(ADD)        // VP_ADD
6412   VP_CASE(SUB)        // VP_SUB
6413   VP_CASE(MUL)        // VP_MUL
6414   VP_CASE(SDIV)       // VP_SDIV
6415   VP_CASE(SREM)       // VP_SREM
6416   VP_CASE(UDIV)       // VP_UDIV
6417   VP_CASE(UREM)       // VP_UREM
6418   VP_CASE(SHL)        // VP_SHL
6419   VP_CASE(FADD)       // VP_FADD
6420   VP_CASE(FSUB)       // VP_FSUB
6421   VP_CASE(FMUL)       // VP_FMUL
6422   VP_CASE(FDIV)       // VP_FDIV
6423   VP_CASE(FNEG)       // VP_FNEG
6424   VP_CASE(FABS)       // VP_FABS
6425   VP_CASE(SMIN)       // VP_SMIN
6426   VP_CASE(SMAX)       // VP_SMAX
6427   VP_CASE(UMIN)       // VP_UMIN
6428   VP_CASE(UMAX)       // VP_UMAX
6429   VP_CASE(FCOPYSIGN)  // VP_FCOPYSIGN
6430   VP_CASE(SETCC)      // VP_SETCC
6431   VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6432   VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6433   VP_CASE(BITREVERSE) // VP_BITREVERSE
6434   VP_CASE(SADDSAT)    // VP_SADDSAT
6435   VP_CASE(UADDSAT)    // VP_UADDSAT
6436   VP_CASE(SSUBSAT)    // VP_SSUBSAT
6437   VP_CASE(USUBSAT)    // VP_USUBSAT
6438   VP_CASE(BSWAP)      // VP_BSWAP
6439   VP_CASE(CTLZ)       // VP_CTLZ
6440   VP_CASE(CTTZ)       // VP_CTTZ
6441   VP_CASE(CTPOP)      // VP_CTPOP
6442   case ISD::CTLZ_ZERO_UNDEF:
6443   case ISD::VP_CTLZ_ZERO_UNDEF:
6444     return RISCVISD::CTLZ_VL;
6445   case ISD::CTTZ_ZERO_UNDEF:
6446   case ISD::VP_CTTZ_ZERO_UNDEF:
6447     return RISCVISD::CTTZ_VL;
6448   case ISD::FMA:
6449   case ISD::VP_FMA:
6450     return RISCVISD::VFMADD_VL;
6451   case ISD::STRICT_FMA:
6452     return RISCVISD::STRICT_VFMADD_VL;
6453   case ISD::AND:
6454   case ISD::VP_AND:
6455     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6456       return RISCVISD::VMAND_VL;
6457     return RISCVISD::AND_VL;
6458   case ISD::OR:
6459   case ISD::VP_OR:
6460     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6461       return RISCVISD::VMOR_VL;
6462     return RISCVISD::OR_VL;
6463   case ISD::XOR:
6464   case ISD::VP_XOR:
6465     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6466       return RISCVISD::VMXOR_VL;
6467     return RISCVISD::XOR_VL;
6468   case ISD::VP_SELECT:
6469   case ISD::VP_MERGE:
6470     return RISCVISD::VMERGE_VL;
6471   case ISD::VP_SRA:
6472     return RISCVISD::SRA_VL;
6473   case ISD::VP_SRL:
6474     return RISCVISD::SRL_VL;
6475   case ISD::VP_SQRT:
6476     return RISCVISD::FSQRT_VL;
6477   case ISD::VP_SIGN_EXTEND:
6478     return RISCVISD::VSEXT_VL;
6479   case ISD::VP_ZERO_EXTEND:
6480     return RISCVISD::VZEXT_VL;
6481   case ISD::VP_FP_TO_SINT:
6482     return RISCVISD::VFCVT_RTZ_X_F_VL;
6483   case ISD::VP_FP_TO_UINT:
6484     return RISCVISD::VFCVT_RTZ_XU_F_VL;
6485   case ISD::FMINNUM:
6486   case ISD::VP_FMINNUM:
6487     return RISCVISD::VFMIN_VL;
6488   case ISD::FMAXNUM:
6489   case ISD::VP_FMAXNUM:
6490     return RISCVISD::VFMAX_VL;
6491   case ISD::LRINT:
6492   case ISD::VP_LRINT:
6493   case ISD::LLRINT:
6494   case ISD::VP_LLRINT:
6495     return RISCVISD::VFCVT_RM_X_F_VL;
6496   }
6497   // clang-format on
6498 #undef OP_CASE
6499 #undef VP_CASE
6500 }
6501 
6502 /// Return true if a RISC-V target specified op has a passthru operand.
6503 static bool hasPassthruOp(unsigned Opcode) {
6504   assert(Opcode > RISCVISD::FIRST_NUMBER &&
6505          Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
6506          "not a RISC-V target specific op");
6507   static_assert(
6508       RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
6509       RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
6510       "adding target specific op should update this function");
6511   if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6512     return true;
6513   if (Opcode == RISCVISD::FCOPYSIGN_VL)
6514     return true;
6515   if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6516     return true;
6517   if (Opcode == RISCVISD::SETCC_VL)
6518     return true;
6519   if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6520     return true;
6521   if (Opcode == RISCVISD::VMERGE_VL)
6522     return true;
6523   return false;
6524 }
6525 
6526 /// Return true if a RISC-V target specified op has a mask operand.
6527 static bool hasMaskOp(unsigned Opcode) {
6528   assert(Opcode > RISCVISD::FIRST_NUMBER &&
6529          Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
6530          "not a RISC-V target specific op");
6531   static_assert(
6532       RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
6533       RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
6534       "adding target specific op should update this function");
6535   if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6536     return true;
6537   if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6538     return true;
6539   if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6540       Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
6541     return true;
6542   return false;
6543 }
6544 
6545 static bool isPromotedOpNeedingSplit(SDValue Op,
6546                                      const RISCVSubtarget &Subtarget) {
6547   if (Op.getValueType() == MVT::nxv32f16 &&
6548       (Subtarget.hasVInstructionsF16Minimal() &&
6549        !Subtarget.hasVInstructionsF16()))
6550     return true;
6551   if (Op.getValueType() == MVT::nxv32bf16)
6552     return true;
6553   return false;
6554 }
6555 
6556 static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
6557   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6558   SDLoc DL(Op);
6559 
6560   SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
6561   SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
6562 
6563   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6564     if (!Op.getOperand(j).getValueType().isVector()) {
6565       LoOperands[j] = Op.getOperand(j);
6566       HiOperands[j] = Op.getOperand(j);
6567       continue;
6568     }
6569     std::tie(LoOperands[j], HiOperands[j]) =
6570         DAG.SplitVector(Op.getOperand(j), DL);
6571   }
6572 
6573   SDValue LoRes =
6574       DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6575   SDValue HiRes =
6576       DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6577 
6578   return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6579 }
6580 
6581 static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
6582   assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6583   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6584   SDLoc DL(Op);
6585 
6586   SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
6587   SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
6588 
6589   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6590     if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6591       std::tie(LoOperands[j], HiOperands[j]) =
6592           DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6593       continue;
6594     }
6595     if (!Op.getOperand(j).getValueType().isVector()) {
6596       LoOperands[j] = Op.getOperand(j);
6597       HiOperands[j] = Op.getOperand(j);
6598       continue;
6599     }
6600     std::tie(LoOperands[j], HiOperands[j]) =
6601         DAG.SplitVector(Op.getOperand(j), DL);
6602   }
6603 
6604   SDValue LoRes =
6605       DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6606   SDValue HiRes =
6607       DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6608 
6609   return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6610 }
6611 
6612 static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
6613   SDLoc DL(Op);
6614 
6615   auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6616   auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6617   auto [EVLLo, EVLHi] =
6618       DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6619 
6620   SDValue ResLo =
6621       DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6622                   {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6623   return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6624                      {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6625 }
6626 
6627 static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
6628 
6629   assert(Op->isStrictFPOpcode());
6630 
6631   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6632 
6633   SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6634   SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6635 
6636   SDLoc DL(Op);
6637 
6638   SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
6639   SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
6640 
6641   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6642     if (!Op.getOperand(j).getValueType().isVector()) {
6643       LoOperands[j] = Op.getOperand(j);
6644       HiOperands[j] = Op.getOperand(j);
6645       continue;
6646     }
6647     std::tie(LoOperands[j], HiOperands[j]) =
6648         DAG.SplitVector(Op.getOperand(j), DL);
6649   }
6650 
6651   SDValue LoRes =
6652       DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6653   HiOperands[0] = LoRes.getValue(1);
6654   SDValue HiRes =
6655       DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6656 
6657   SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6658                           LoRes.getValue(0), HiRes.getValue(0));
6659   return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6660 }
6661 
6662 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
6663                                             SelectionDAG &DAG) const {
6664   switch (Op.getOpcode()) {
6665   default:
6666     report_fatal_error("unimplemented operand");
6667   case ISD::ATOMIC_FENCE:
6668     return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6669   case ISD::GlobalAddress:
6670     return lowerGlobalAddress(Op, DAG);
6671   case ISD::BlockAddress:
6672     return lowerBlockAddress(Op, DAG);
6673   case ISD::ConstantPool:
6674     return lowerConstantPool(Op, DAG);
6675   case ISD::JumpTable:
6676     return lowerJumpTable(Op, DAG);
6677   case ISD::GlobalTLSAddress:
6678     return lowerGlobalTLSAddress(Op, DAG);
6679   case ISD::Constant:
6680     return lowerConstant(Op, DAG, Subtarget);
6681   case ISD::ConstantFP:
6682     return lowerConstantFP(Op, DAG);
6683   case ISD::SELECT:
6684     return lowerSELECT(Op, DAG);
6685   case ISD::BRCOND:
6686     return lowerBRCOND(Op, DAG);
6687   case ISD::VASTART:
6688     return lowerVASTART(Op, DAG);
6689   case ISD::FRAMEADDR:
6690     return lowerFRAMEADDR(Op, DAG);
6691   case ISD::RETURNADDR:
6692     return lowerRETURNADDR(Op, DAG);
6693   case ISD::SHL_PARTS:
6694     return lowerShiftLeftParts(Op, DAG);
6695   case ISD::SRA_PARTS:
6696     return lowerShiftRightParts(Op, DAG, true);
6697   case ISD::SRL_PARTS:
6698     return lowerShiftRightParts(Op, DAG, false);
6699   case ISD::ROTL:
6700   case ISD::ROTR:
6701     if (Op.getValueType().isFixedLengthVector()) {
6702       assert(Subtarget.hasStdExtZvkb());
6703       return lowerToScalableOp(Op, DAG);
6704     }
6705     assert(Subtarget.hasVendorXTHeadBb() &&
6706            !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6707            "Unexpected custom legalization");
6708     // XTHeadBb only supports rotate by constant.
6709     if (!isa<ConstantSDNode>(Op.getOperand(1)))
6710       return SDValue();
6711     return Op;
6712   case ISD::BITCAST: {
6713     SDLoc DL(Op);
6714     EVT VT = Op.getValueType();
6715     SDValue Op0 = Op.getOperand(0);
6716     EVT Op0VT = Op0.getValueType();
6717     MVT XLenVT = Subtarget.getXLenVT();
6718     if (Op0VT == MVT::i16 &&
6719         ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6720          (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6721       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6722       return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6723     }
6724     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6725         Subtarget.hasStdExtFOrZfinx()) {
6726       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6727       return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6728     }
6729     if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6730         Subtarget.hasStdExtDOrZdinx()) {
6731       SDValue Lo, Hi;
6732       std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6733       return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6734     }
6735 
6736     // Consider other scalar<->scalar casts as legal if the types are legal.
6737     // Otherwise expand them.
6738     if (!VT.isVector() && !Op0VT.isVector()) {
6739       if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6740         return Op;
6741       return SDValue();
6742     }
6743 
6744     assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6745            "Unexpected types");
6746 
6747     if (VT.isFixedLengthVector()) {
6748       // We can handle fixed length vector bitcasts with a simple replacement
6749       // in isel.
6750       if (Op0VT.isFixedLengthVector())
6751         return Op;
6752       // When bitcasting from scalar to fixed-length vector, insert the scalar
6753       // into a one-element vector of the result type, and perform a vector
6754       // bitcast.
6755       if (!Op0VT.isVector()) {
6756         EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6757         if (!isTypeLegal(BVT))
6758           return SDValue();
6759         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6760                                               DAG.getUNDEF(BVT), Op0,
6761                                               DAG.getVectorIdxConstant(0, DL)));
6762       }
6763       return SDValue();
6764     }
6765     // Custom-legalize bitcasts from fixed-length vector types to scalar types
6766     // thus: bitcast the vector to a one-element vector type whose element type
6767     // is the same as the result type, and extract the first element.
6768     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6769       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6770       if (!isTypeLegal(BVT))
6771         return SDValue();
6772       SDValue BVec = DAG.getBitcast(BVT, Op0);
6773       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6774                          DAG.getVectorIdxConstant(0, DL));
6775     }
6776     return SDValue();
6777   }
6778   case ISD::INTRINSIC_WO_CHAIN:
6779     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6780   case ISD::INTRINSIC_W_CHAIN:
6781     return LowerINTRINSIC_W_CHAIN(Op, DAG);
6782   case ISD::INTRINSIC_VOID:
6783     return LowerINTRINSIC_VOID(Op, DAG);
6784   case ISD::IS_FPCLASS:
6785     return LowerIS_FPCLASS(Op, DAG);
6786   case ISD::BITREVERSE: {
6787     MVT VT = Op.getSimpleValueType();
6788     if (VT.isFixedLengthVector()) {
6789       assert(Subtarget.hasStdExtZvbb());
6790       return lowerToScalableOp(Op, DAG);
6791     }
6792     SDLoc DL(Op);
6793     assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6794     assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6795     // Expand bitreverse to a bswap(rev8) followed by brev8.
6796     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6797     return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6798   }
6799   case ISD::TRUNCATE:
6800   case ISD::TRUNCATE_SSAT_S:
6801   case ISD::TRUNCATE_USAT_U:
6802     // Only custom-lower vector truncates
6803     if (!Op.getSimpleValueType().isVector())
6804       return Op;
6805     return lowerVectorTruncLike(Op, DAG);
6806   case ISD::ANY_EXTEND:
6807   case ISD::ZERO_EXTEND:
6808     if (Op.getOperand(0).getValueType().isVector() &&
6809         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6810       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6811     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6812   case ISD::SIGN_EXTEND:
6813     if (Op.getOperand(0).getValueType().isVector() &&
6814         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6815       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6816     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6817   case ISD::SPLAT_VECTOR_PARTS:
6818     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6819   case ISD::INSERT_VECTOR_ELT:
6820     return lowerINSERT_VECTOR_ELT(Op, DAG);
6821   case ISD::EXTRACT_VECTOR_ELT:
6822     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6823   case ISD::SCALAR_TO_VECTOR: {
6824     MVT VT = Op.getSimpleValueType();
6825     SDLoc DL(Op);
6826     SDValue Scalar = Op.getOperand(0);
6827     if (VT.getVectorElementType() == MVT::i1) {
6828       MVT WideVT = VT.changeVectorElementType(MVT::i8);
6829       SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6830       return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6831     }
6832     MVT ContainerVT = VT;
6833     if (VT.isFixedLengthVector())
6834       ContainerVT = getContainerForFixedLengthVector(VT);
6835     SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6836 
6837     SDValue V;
6838     if (VT.isFloatingPoint()) {
6839       V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6840                       DAG.getUNDEF(ContainerVT), Scalar, VL);
6841     } else {
6842       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6843       V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6844                       DAG.getUNDEF(ContainerVT), Scalar, VL);
6845     }
6846     if (VT.isFixedLengthVector())
6847       V = convertFromScalableVector(VT, V, DAG, Subtarget);
6848     return V;
6849   }
6850   case ISD::VSCALE: {
6851     MVT XLenVT = Subtarget.getXLenVT();
6852     MVT VT = Op.getSimpleValueType();
6853     SDLoc DL(Op);
6854     SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6855     // We define our scalable vector types for lmul=1 to use a 64 bit known
6856     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6857     // vscale as VLENB / 8.
6858     static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6859     if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6860       report_fatal_error("Support for VLEN==32 is incomplete.");
6861     // We assume VLENB is a multiple of 8. We manually choose the best shift
6862     // here because SimplifyDemandedBits isn't always able to simplify it.
6863     uint64_t Val = Op.getConstantOperandVal(0);
6864     if (isPowerOf2_64(Val)) {
6865       uint64_t Log2 = Log2_64(Val);
6866       if (Log2 < 3)
6867         Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6868                           DAG.getConstant(3 - Log2, DL, VT));
6869       else if (Log2 > 3)
6870         Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6871                           DAG.getConstant(Log2 - 3, DL, XLenVT));
6872     } else if ((Val % 8) == 0) {
6873       // If the multiplier is a multiple of 8, scale it down to avoid needing
6874       // to shift the VLENB value.
6875       Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6876                         DAG.getConstant(Val / 8, DL, XLenVT));
6877     } else {
6878       SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6879                                    DAG.getConstant(3, DL, XLenVT));
6880       Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6881                         DAG.getConstant(Val, DL, XLenVT));
6882     }
6883     return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6884   }
6885   case ISD::FPOWI: {
6886     // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6887     // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6888     if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6889         Op.getOperand(1).getValueType() == MVT::i32) {
6890       SDLoc DL(Op);
6891       SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6892       SDValue Powi =
6893           DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6894       return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6895                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6896     }
6897     return SDValue();
6898   }
6899   case ISD::FMAXIMUM:
6900   case ISD::FMINIMUM:
6901     if (isPromotedOpNeedingSplit(Op, Subtarget))
6902       return SplitVectorOp(Op, DAG);
6903     return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6904   case ISD::FP_EXTEND:
6905   case ISD::FP_ROUND:
6906     return lowerVectorFPExtendOrRoundLike(Op, DAG);
6907   case ISD::STRICT_FP_ROUND:
6908   case ISD::STRICT_FP_EXTEND:
6909     return lowerStrictFPExtendOrRoundLike(Op, DAG);
6910   case ISD::SINT_TO_FP:
6911   case ISD::UINT_TO_FP:
6912     if (Op.getValueType().isVector() &&
6913         ((Op.getValueType().getScalarType() == MVT::f16 &&
6914           (Subtarget.hasVInstructionsF16Minimal() &&
6915            !Subtarget.hasVInstructionsF16())) ||
6916          Op.getValueType().getScalarType() == MVT::bf16)) {
6917       if (isPromotedOpNeedingSplit(Op, Subtarget))
6918         return SplitVectorOp(Op, DAG);
6919       // int -> f32
6920       SDLoc DL(Op);
6921       MVT NVT =
6922           MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6923       SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6924       // f32 -> [b]f16
6925       return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6926                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6927     }
6928     [[fallthrough]];
6929   case ISD::FP_TO_SINT:
6930   case ISD::FP_TO_UINT:
6931     if (SDValue Op1 = Op.getOperand(0);
6932         Op1.getValueType().isVector() &&
6933         ((Op1.getValueType().getScalarType() == MVT::f16 &&
6934           (Subtarget.hasVInstructionsF16Minimal() &&
6935            !Subtarget.hasVInstructionsF16())) ||
6936          Op1.getValueType().getScalarType() == MVT::bf16)) {
6937       if (isPromotedOpNeedingSplit(Op1, Subtarget))
6938         return SplitVectorOp(Op, DAG);
6939       // [b]f16 -> f32
6940       SDLoc DL(Op);
6941       MVT NVT = MVT::getVectorVT(MVT::f32,
6942                                  Op1.getValueType().getVectorElementCount());
6943       SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6944       // f32 -> int
6945       return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6946     }
6947     [[fallthrough]];
6948   case ISD::STRICT_FP_TO_SINT:
6949   case ISD::STRICT_FP_TO_UINT:
6950   case ISD::STRICT_SINT_TO_FP:
6951   case ISD::STRICT_UINT_TO_FP: {
6952     // RVV can only do fp<->int conversions to types half/double the size as
6953     // the source. We custom-lower any conversions that do two hops into
6954     // sequences.
6955     MVT VT = Op.getSimpleValueType();
6956     if (VT.isScalarInteger())
6957       return lowerFP_TO_INT(Op, DAG, Subtarget);
6958     bool IsStrict = Op->isStrictFPOpcode();
6959     SDValue Src = Op.getOperand(0 + IsStrict);
6960     MVT SrcVT = Src.getSimpleValueType();
6961     if (SrcVT.isScalarInteger())
6962       return lowerINT_TO_FP(Op, DAG, Subtarget);
6963     if (!VT.isVector())
6964       return Op;
6965     SDLoc DL(Op);
6966     MVT EltVT = VT.getVectorElementType();
6967     MVT SrcEltVT = SrcVT.getVectorElementType();
6968     unsigned EltSize = EltVT.getSizeInBits();
6969     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6970     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6971            "Unexpected vector element types");
6972 
6973     bool IsInt2FP = SrcEltVT.isInteger();
6974     // Widening conversions
6975     if (EltSize > (2 * SrcEltSize)) {
6976       if (IsInt2FP) {
6977         // Do a regular integer sign/zero extension then convert to float.
6978         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6979                                       VT.getVectorElementCount());
6980         unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6981                               Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6982                                  ? ISD::ZERO_EXTEND
6983                                  : ISD::SIGN_EXTEND;
6984         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6985         if (IsStrict)
6986           return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6987                              Op.getOperand(0), Ext);
6988         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6989       }
6990       // FP2Int
6991       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6992       // Do one doubling fp_extend then complete the operation by converting
6993       // to int.
6994       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6995       if (IsStrict) {
6996         auto [FExt, Chain] =
6997             DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6998         return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6999       }
7000       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7001       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7002     }
7003 
7004     // Narrowing conversions
7005     if (SrcEltSize > (2 * EltSize)) {
7006       if (IsInt2FP) {
7007         // One narrowing int_to_fp, then an fp_round.
7008         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7009         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7010         if (IsStrict) {
7011           SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7012                                        DAG.getVTList(InterimFVT, MVT::Other),
7013                                        Op.getOperand(0), Src);
7014           SDValue Chain = Int2FP.getValue(1);
7015           return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7016         }
7017         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7018         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7019       }
7020       // FP2Int
7021       // One narrowing fp_to_int, then truncate the integer. If the float isn't
7022       // representable by the integer, the result is poison.
7023       MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7024                                     VT.getVectorElementCount());
7025       if (IsStrict) {
7026         SDValue FP2Int =
7027             DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7028                         Op.getOperand(0), Src);
7029         SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7030         return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7031       }
7032       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7033       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7034     }
7035 
7036     // Scalable vectors can exit here. Patterns will handle equally-sized
7037     // conversions halving/doubling ones.
7038     if (!VT.isFixedLengthVector())
7039       return Op;
7040 
7041     // For fixed-length vectors we lower to a custom "VL" node.
7042     unsigned RVVOpc = 0;
7043     switch (Op.getOpcode()) {
7044     default:
7045       llvm_unreachable("Impossible opcode");
7046     case ISD::FP_TO_SINT:
7047       RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7048       break;
7049     case ISD::FP_TO_UINT:
7050       RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7051       break;
7052     case ISD::SINT_TO_FP:
7053       RVVOpc = RISCVISD::SINT_TO_FP_VL;
7054       break;
7055     case ISD::UINT_TO_FP:
7056       RVVOpc = RISCVISD::UINT_TO_FP_VL;
7057       break;
7058     case ISD::STRICT_FP_TO_SINT:
7059       RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7060       break;
7061     case ISD::STRICT_FP_TO_UINT:
7062       RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7063       break;
7064     case ISD::STRICT_SINT_TO_FP:
7065       RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7066       break;
7067     case ISD::STRICT_UINT_TO_FP:
7068       RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7069       break;
7070     }
7071 
7072     MVT ContainerVT = getContainerForFixedLengthVector(VT);
7073     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7074     assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7075            "Expected same element count");
7076 
7077     auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7078 
7079     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7080     if (IsStrict) {
7081       Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7082                         Op.getOperand(0), Src, Mask, VL);
7083       SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7084       return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7085     }
7086     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7087     return convertFromScalableVector(VT, Src, DAG, Subtarget);
7088   }
7089   case ISD::FP_TO_SINT_SAT:
7090   case ISD::FP_TO_UINT_SAT:
7091     return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7092   case ISD::FP_TO_BF16: {
7093     // Custom lower to ensure the libcall return is passed in an FPR on hard
7094     // float ABIs.
7095     assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7096     SDLoc DL(Op);
7097     MakeLibCallOptions CallOptions;
7098     RTLIB::Libcall LC =
7099         RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7100     SDValue Res =
7101         makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7102     if (Subtarget.is64Bit())
7103       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7104     return DAG.getBitcast(MVT::i32, Res);
7105   }
7106   case ISD::BF16_TO_FP: {
7107     assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7108     MVT VT = Op.getSimpleValueType();
7109     SDLoc DL(Op);
7110     Op = DAG.getNode(
7111         ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7112         DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7113     SDValue Res = Subtarget.is64Bit()
7114                       ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7115                       : DAG.getBitcast(MVT::f32, Op);
7116     // fp_extend if the target VT is bigger than f32.
7117     if (VT != MVT::f32)
7118       return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7119     return Res;
7120   }
7121   case ISD::STRICT_FP_TO_FP16:
7122   case ISD::FP_TO_FP16: {
7123     // Custom lower to ensure the libcall return is passed in an FPR on hard
7124     // float ABIs.
7125     assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7126     SDLoc DL(Op);
7127     MakeLibCallOptions CallOptions;
7128     bool IsStrict = Op->isStrictFPOpcode();
7129     SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7130     SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7131     RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7132     SDValue Res;
7133     std::tie(Res, Chain) =
7134         makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7135     if (Subtarget.is64Bit())
7136       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7137     SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7138     if (IsStrict)
7139       return DAG.getMergeValues({Result, Chain}, DL);
7140     return Result;
7141   }
7142   case ISD::STRICT_FP16_TO_FP:
7143   case ISD::FP16_TO_FP: {
7144     // Custom lower to ensure the libcall argument is passed in an FPR on hard
7145     // float ABIs.
7146     assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7147     SDLoc DL(Op);
7148     MakeLibCallOptions CallOptions;
7149     bool IsStrict = Op->isStrictFPOpcode();
7150     SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7151     SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7152     SDValue Arg = Subtarget.is64Bit()
7153                       ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7154                       : DAG.getBitcast(MVT::f32, Op0);
7155     SDValue Res;
7156     std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7157                                        CallOptions, DL, Chain);
7158     if (IsStrict)
7159       return DAG.getMergeValues({Res, Chain}, DL);
7160     return Res;
7161   }
7162   case ISD::FTRUNC:
7163   case ISD::FCEIL:
7164   case ISD::FFLOOR:
7165   case ISD::FNEARBYINT:
7166   case ISD::FRINT:
7167   case ISD::FROUND:
7168   case ISD::FROUNDEVEN:
7169     if (isPromotedOpNeedingSplit(Op, Subtarget))
7170       return SplitVectorOp(Op, DAG);
7171     return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7172   case ISD::LRINT:
7173   case ISD::LLRINT:
7174     if (Op.getValueType().isVector())
7175       return lowerVectorXRINT(Op, DAG, Subtarget);
7176     [[fallthrough]];
7177   case ISD::LROUND:
7178   case ISD::LLROUND: {
7179     assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7180            "Unexpected custom legalisation");
7181     SDLoc DL(Op);
7182     SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7183     return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7184   }
7185   case ISD::STRICT_LRINT:
7186   case ISD::STRICT_LLRINT:
7187   case ISD::STRICT_LROUND:
7188   case ISD::STRICT_LLROUND: {
7189     assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7190            "Unexpected custom legalisation");
7191     SDLoc DL(Op);
7192     SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7193                               {Op.getOperand(0), Op.getOperand(1)});
7194     return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7195                        {Ext.getValue(1), Ext.getValue(0)});
7196   }
7197   case ISD::VECREDUCE_ADD:
7198   case ISD::VECREDUCE_UMAX:
7199   case ISD::VECREDUCE_SMAX:
7200   case ISD::VECREDUCE_UMIN:
7201   case ISD::VECREDUCE_SMIN:
7202     return lowerVECREDUCE(Op, DAG);
7203   case ISD::VECREDUCE_AND:
7204   case ISD::VECREDUCE_OR:
7205   case ISD::VECREDUCE_XOR:
7206     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7207       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7208     return lowerVECREDUCE(Op, DAG);
7209   case ISD::VECREDUCE_FADD:
7210   case ISD::VECREDUCE_SEQ_FADD:
7211   case ISD::VECREDUCE_FMIN:
7212   case ISD::VECREDUCE_FMAX:
7213   case ISD::VECREDUCE_FMAXIMUM:
7214   case ISD::VECREDUCE_FMINIMUM:
7215     return lowerFPVECREDUCE(Op, DAG);
7216   case ISD::VP_REDUCE_ADD:
7217   case ISD::VP_REDUCE_UMAX:
7218   case ISD::VP_REDUCE_SMAX:
7219   case ISD::VP_REDUCE_UMIN:
7220   case ISD::VP_REDUCE_SMIN:
7221   case ISD::VP_REDUCE_FADD:
7222   case ISD::VP_REDUCE_SEQ_FADD:
7223   case ISD::VP_REDUCE_FMIN:
7224   case ISD::VP_REDUCE_FMAX:
7225   case ISD::VP_REDUCE_FMINIMUM:
7226   case ISD::VP_REDUCE_FMAXIMUM:
7227     if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7228       return SplitVectorReductionOp(Op, DAG);
7229     return lowerVPREDUCE(Op, DAG);
7230   case ISD::VP_REDUCE_AND:
7231   case ISD::VP_REDUCE_OR:
7232   case ISD::VP_REDUCE_XOR:
7233     if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7234       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7235     return lowerVPREDUCE(Op, DAG);
7236   case ISD::VP_CTTZ_ELTS:
7237   case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7238     return lowerVPCttzElements(Op, DAG);
7239   case ISD::UNDEF: {
7240     MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7241     return convertFromScalableVector(Op.getSimpleValueType(),
7242                                      DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7243   }
7244   case ISD::INSERT_SUBVECTOR:
7245     return lowerINSERT_SUBVECTOR(Op, DAG);
7246   case ISD::EXTRACT_SUBVECTOR:
7247     return lowerEXTRACT_SUBVECTOR(Op, DAG);
7248   case ISD::VECTOR_DEINTERLEAVE:
7249     return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7250   case ISD::VECTOR_INTERLEAVE:
7251     return lowerVECTOR_INTERLEAVE(Op, DAG);
7252   case ISD::STEP_VECTOR:
7253     return lowerSTEP_VECTOR(Op, DAG);
7254   case ISD::VECTOR_REVERSE:
7255     return lowerVECTOR_REVERSE(Op, DAG);
7256   case ISD::VECTOR_SPLICE:
7257     return lowerVECTOR_SPLICE(Op, DAG);
7258   case ISD::BUILD_VECTOR:
7259     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7260   case ISD::SPLAT_VECTOR: {
7261     MVT VT = Op.getSimpleValueType();
7262     MVT EltVT = VT.getVectorElementType();
7263     if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7264         EltVT == MVT::bf16) {
7265       SDLoc DL(Op);
7266       SDValue Elt;
7267       if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7268           (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7269         Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7270                           Op.getOperand(0));
7271       else
7272         Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7273       MVT IVT = VT.changeVectorElementType(MVT::i16);
7274       return DAG.getNode(ISD::BITCAST, DL, VT,
7275                          DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7276     }
7277 
7278     if (EltVT == MVT::i1)
7279       return lowerVectorMaskSplat(Op, DAG);
7280     return SDValue();
7281   }
7282   case ISD::VECTOR_SHUFFLE:
7283     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7284   case ISD::CONCAT_VECTORS: {
7285     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7286     // better than going through the stack, as the default expansion does.
7287     SDLoc DL(Op);
7288     MVT VT = Op.getSimpleValueType();
7289     MVT ContainerVT = VT;
7290     if (VT.isFixedLengthVector())
7291       ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7292 
7293     // Recursively split concat_vectors with more than 2 operands:
7294     //
7295     // concat_vector op1, op2, op3, op4
7296     // ->
7297     // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7298     //
7299     // This reduces the length of the chain of vslideups and allows us to
7300     // perform the vslideups at a smaller LMUL, limited to MF2.
7301     if (Op.getNumOperands() > 2 &&
7302         ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7303       MVT HalfVT = VT.getHalfNumVectorElementsVT();
7304       assert(isPowerOf2_32(Op.getNumOperands()));
7305       size_t HalfNumOps = Op.getNumOperands() / 2;
7306       SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7307                                Op->ops().take_front(HalfNumOps));
7308       SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7309                                Op->ops().drop_front(HalfNumOps));
7310       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7311     }
7312 
7313     unsigned NumOpElts =
7314         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7315     SDValue Vec = DAG.getUNDEF(VT);
7316     for (const auto &OpIdx : enumerate(Op->ops())) {
7317       SDValue SubVec = OpIdx.value();
7318       // Don't insert undef subvectors.
7319       if (SubVec.isUndef())
7320         continue;
7321       Vec =
7322           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7323                       DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7324     }
7325     return Vec;
7326   }
7327   case ISD::LOAD: {
7328     auto *Load = cast<LoadSDNode>(Op);
7329     EVT VecTy = Load->getMemoryVT();
7330     // Handle normal vector tuple load.
7331     if (VecTy.isRISCVVectorTuple()) {
7332       SDLoc DL(Op);
7333       MVT XLenVT = Subtarget.getXLenVT();
7334       unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7335       unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7336       unsigned NumElts = Sz / (NF * 8);
7337       int Log2LMUL = Log2_64(NumElts) - 3;
7338 
7339       auto Flag = SDNodeFlags();
7340       Flag.setNoUnsignedWrap(true);
7341       SDValue Ret = DAG.getUNDEF(VecTy);
7342       SDValue BasePtr = Load->getBasePtr();
7343       SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7344       VROffset =
7345           DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7346                       DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7347       SmallVector<SDValue, 8> OutChains;
7348 
7349       // Load NF vector registers and combine them to a vector tuple.
7350       for (unsigned i = 0; i < NF; ++i) {
7351         SDValue LoadVal = DAG.getLoad(
7352             MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7353             BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7354         OutChains.push_back(LoadVal.getValue(1));
7355         Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7356                           DAG.getVectorIdxConstant(i, DL));
7357         BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7358       }
7359       return DAG.getMergeValues(
7360           {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7361     }
7362 
7363     if (auto V = expandUnalignedRVVLoad(Op, DAG))
7364       return V;
7365     if (Op.getValueType().isFixedLengthVector())
7366       return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7367     return Op;
7368   }
7369   case ISD::STORE: {
7370     auto *Store = cast<StoreSDNode>(Op);
7371     SDValue StoredVal = Store->getValue();
7372     EVT VecTy = StoredVal.getValueType();
7373     // Handle normal vector tuple store.
7374     if (VecTy.isRISCVVectorTuple()) {
7375       SDLoc DL(Op);
7376       MVT XLenVT = Subtarget.getXLenVT();
7377       unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7378       unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7379       unsigned NumElts = Sz / (NF * 8);
7380       int Log2LMUL = Log2_64(NumElts) - 3;
7381 
7382       auto Flag = SDNodeFlags();
7383       Flag.setNoUnsignedWrap(true);
7384       SDValue Ret;
7385       SDValue Chain = Store->getChain();
7386       SDValue BasePtr = Store->getBasePtr();
7387       SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7388       VROffset =
7389           DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7390                       DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7391 
7392       // Extract subregisters in a vector tuple and store them individually.
7393       for (unsigned i = 0; i < NF; ++i) {
7394         auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7395                                    MVT::getScalableVectorVT(MVT::i8, NumElts),
7396                                    StoredVal, DAG.getVectorIdxConstant(i, DL));
7397         Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7398                            MachinePointerInfo(Store->getAddressSpace()),
7399                            Store->getOriginalAlign(),
7400                            Store->getMemOperand()->getFlags());
7401         Chain = Ret.getValue(0);
7402         BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7403       }
7404       return Ret;
7405     }
7406 
7407     if (auto V = expandUnalignedRVVStore(Op, DAG))
7408       return V;
7409     if (Op.getOperand(1).getValueType().isFixedLengthVector())
7410       return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7411     return Op;
7412   }
7413   case ISD::MLOAD:
7414   case ISD::VP_LOAD:
7415     return lowerMaskedLoad(Op, DAG);
7416   case ISD::MSTORE:
7417   case ISD::VP_STORE:
7418     return lowerMaskedStore(Op, DAG);
7419   case ISD::VECTOR_COMPRESS:
7420     return lowerVectorCompress(Op, DAG);
7421   case ISD::SELECT_CC: {
7422     // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7423     // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7424     // into separate SETCC+SELECT just like LegalizeDAG.
7425     SDValue Tmp1 = Op.getOperand(0);
7426     SDValue Tmp2 = Op.getOperand(1);
7427     SDValue True = Op.getOperand(2);
7428     SDValue False = Op.getOperand(3);
7429     EVT VT = Op.getValueType();
7430     SDValue CC = Op.getOperand(4);
7431     EVT CmpVT = Tmp1.getValueType();
7432     EVT CCVT =
7433         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7434     SDLoc DL(Op);
7435     SDValue Cond =
7436         DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7437     return DAG.getSelect(DL, VT, Cond, True, False);
7438   }
7439   case ISD::SETCC: {
7440     MVT OpVT = Op.getOperand(0).getSimpleValueType();
7441     if (OpVT.isScalarInteger()) {
7442       MVT VT = Op.getSimpleValueType();
7443       SDValue LHS = Op.getOperand(0);
7444       SDValue RHS = Op.getOperand(1);
7445       ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7446       assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7447              "Unexpected CondCode");
7448 
7449       SDLoc DL(Op);
7450 
7451       // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7452       // convert this to the equivalent of (set(u)ge X, C+1) by using
7453       // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7454       // in a register.
7455       if (isa<ConstantSDNode>(RHS)) {
7456         int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7457         if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7458           // If this is an unsigned compare and the constant is -1, incrementing
7459           // the constant would change behavior. The result should be false.
7460           if (CCVal == ISD::SETUGT && Imm == -1)
7461             return DAG.getConstant(0, DL, VT);
7462           // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7463           CCVal = ISD::getSetCCSwappedOperands(CCVal);
7464           SDValue SetCC = DAG.getSetCC(
7465               DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7466           return DAG.getLogicalNOT(DL, SetCC, VT);
7467         }
7468       }
7469 
7470       // Not a constant we could handle, swap the operands and condition code to
7471       // SETLT/SETULT.
7472       CCVal = ISD::getSetCCSwappedOperands(CCVal);
7473       return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7474     }
7475 
7476     if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7477       return SplitVectorOp(Op, DAG);
7478 
7479     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7480   }
7481   case ISD::ADD:
7482   case ISD::SUB:
7483   case ISD::MUL:
7484   case ISD::MULHS:
7485   case ISD::MULHU:
7486   case ISD::AND:
7487   case ISD::OR:
7488   case ISD::XOR:
7489   case ISD::SDIV:
7490   case ISD::SREM:
7491   case ISD::UDIV:
7492   case ISD::UREM:
7493   case ISD::BSWAP:
7494   case ISD::CTPOP:
7495     return lowerToScalableOp(Op, DAG);
7496   case ISD::SHL:
7497   case ISD::SRA:
7498   case ISD::SRL:
7499     if (Op.getSimpleValueType().isFixedLengthVector())
7500       return lowerToScalableOp(Op, DAG);
7501     // This can be called for an i32 shift amount that needs to be promoted.
7502     assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7503            "Unexpected custom legalisation");
7504     return SDValue();
7505   case ISD::FABS:
7506   case ISD::FNEG:
7507     if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7508       return lowerFABSorFNEG(Op, DAG, Subtarget);
7509     [[fallthrough]];
7510   case ISD::FADD:
7511   case ISD::FSUB:
7512   case ISD::FMUL:
7513   case ISD::FDIV:
7514   case ISD::FSQRT:
7515   case ISD::FMA:
7516   case ISD::FMINNUM:
7517   case ISD::FMAXNUM:
7518     if (isPromotedOpNeedingSplit(Op, Subtarget))
7519       return SplitVectorOp(Op, DAG);
7520     [[fallthrough]];
7521   case ISD::AVGFLOORS:
7522   case ISD::AVGFLOORU:
7523   case ISD::AVGCEILS:
7524   case ISD::AVGCEILU:
7525   case ISD::SMIN:
7526   case ISD::SMAX:
7527   case ISD::UMIN:
7528   case ISD::UMAX:
7529   case ISD::UADDSAT:
7530   case ISD::USUBSAT:
7531   case ISD::SADDSAT:
7532   case ISD::SSUBSAT:
7533     return lowerToScalableOp(Op, DAG);
7534   case ISD::ABDS:
7535   case ISD::ABDU: {
7536     SDLoc dl(Op);
7537     EVT VT = Op->getValueType(0);
7538     SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7539     SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7540     bool IsSigned = Op->getOpcode() == ISD::ABDS;
7541 
7542     // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7543     // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7544     unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7545     unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7546     SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7547     SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7548     return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7549   }
7550   case ISD::ABS:
7551   case ISD::VP_ABS:
7552     return lowerABS(Op, DAG);
7553   case ISD::CTLZ:
7554   case ISD::CTLZ_ZERO_UNDEF:
7555   case ISD::CTTZ:
7556   case ISD::CTTZ_ZERO_UNDEF:
7557     if (Subtarget.hasStdExtZvbb())
7558       return lowerToScalableOp(Op, DAG);
7559     assert(Op.getOpcode() != ISD::CTTZ);
7560     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7561   case ISD::VSELECT:
7562     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7563   case ISD::FCOPYSIGN:
7564     if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7565       return lowerFCOPYSIGN(Op, DAG, Subtarget);
7566     if (isPromotedOpNeedingSplit(Op, Subtarget))
7567       return SplitVectorOp(Op, DAG);
7568     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7569   case ISD::STRICT_FADD:
7570   case ISD::STRICT_FSUB:
7571   case ISD::STRICT_FMUL:
7572   case ISD::STRICT_FDIV:
7573   case ISD::STRICT_FSQRT:
7574   case ISD::STRICT_FMA:
7575     if (isPromotedOpNeedingSplit(Op, Subtarget))
7576       return SplitStrictFPVectorOp(Op, DAG);
7577     return lowerToScalableOp(Op, DAG);
7578   case ISD::STRICT_FSETCC:
7579   case ISD::STRICT_FSETCCS:
7580     return lowerVectorStrictFSetcc(Op, DAG);
7581   case ISD::STRICT_FCEIL:
7582   case ISD::STRICT_FRINT:
7583   case ISD::STRICT_FFLOOR:
7584   case ISD::STRICT_FTRUNC:
7585   case ISD::STRICT_FNEARBYINT:
7586   case ISD::STRICT_FROUND:
7587   case ISD::STRICT_FROUNDEVEN:
7588     return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7589   case ISD::MGATHER:
7590   case ISD::VP_GATHER:
7591     return lowerMaskedGather(Op, DAG);
7592   case ISD::MSCATTER:
7593   case ISD::VP_SCATTER:
7594     return lowerMaskedScatter(Op, DAG);
7595   case ISD::GET_ROUNDING:
7596     return lowerGET_ROUNDING(Op, DAG);
7597   case ISD::SET_ROUNDING:
7598     return lowerSET_ROUNDING(Op, DAG);
7599   case ISD::EH_DWARF_CFA:
7600     return lowerEH_DWARF_CFA(Op, DAG);
7601   case ISD::VP_MERGE:
7602     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7603       return lowerVPMergeMask(Op, DAG);
7604     [[fallthrough]];
7605   case ISD::VP_SELECT:
7606   case ISD::VP_ADD:
7607   case ISD::VP_SUB:
7608   case ISD::VP_MUL:
7609   case ISD::VP_SDIV:
7610   case ISD::VP_UDIV:
7611   case ISD::VP_SREM:
7612   case ISD::VP_UREM:
7613   case ISD::VP_UADDSAT:
7614   case ISD::VP_USUBSAT:
7615   case ISD::VP_SADDSAT:
7616   case ISD::VP_SSUBSAT:
7617   case ISD::VP_LRINT:
7618   case ISD::VP_LLRINT:
7619     return lowerVPOp(Op, DAG);
7620   case ISD::VP_AND:
7621   case ISD::VP_OR:
7622   case ISD::VP_XOR:
7623     return lowerLogicVPOp(Op, DAG);
7624   case ISD::VP_FADD:
7625   case ISD::VP_FSUB:
7626   case ISD::VP_FMUL:
7627   case ISD::VP_FDIV:
7628   case ISD::VP_FNEG:
7629   case ISD::VP_FABS:
7630   case ISD::VP_SQRT:
7631   case ISD::VP_FMA:
7632   case ISD::VP_FMINNUM:
7633   case ISD::VP_FMAXNUM:
7634   case ISD::VP_FCOPYSIGN:
7635     if (isPromotedOpNeedingSplit(Op, Subtarget))
7636       return SplitVPOp(Op, DAG);
7637     [[fallthrough]];
7638   case ISD::VP_SRA:
7639   case ISD::VP_SRL:
7640   case ISD::VP_SHL:
7641     return lowerVPOp(Op, DAG);
7642   case ISD::VP_IS_FPCLASS:
7643     return LowerIS_FPCLASS(Op, DAG);
7644   case ISD::VP_SIGN_EXTEND:
7645   case ISD::VP_ZERO_EXTEND:
7646     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7647       return lowerVPExtMaskOp(Op, DAG);
7648     return lowerVPOp(Op, DAG);
7649   case ISD::VP_TRUNCATE:
7650     return lowerVectorTruncLike(Op, DAG);
7651   case ISD::VP_FP_EXTEND:
7652   case ISD::VP_FP_ROUND:
7653     return lowerVectorFPExtendOrRoundLike(Op, DAG);
7654   case ISD::VP_SINT_TO_FP:
7655   case ISD::VP_UINT_TO_FP:
7656     if (Op.getValueType().isVector() &&
7657         ((Op.getValueType().getScalarType() == MVT::f16 &&
7658           (Subtarget.hasVInstructionsF16Minimal() &&
7659            !Subtarget.hasVInstructionsF16())) ||
7660          Op.getValueType().getScalarType() == MVT::bf16)) {
7661       if (isPromotedOpNeedingSplit(Op, Subtarget))
7662         return SplitVectorOp(Op, DAG);
7663       // int -> f32
7664       SDLoc DL(Op);
7665       MVT NVT =
7666           MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7667       auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7668       // f32 -> [b]f16
7669       return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7670                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7671     }
7672     [[fallthrough]];
7673   case ISD::VP_FP_TO_SINT:
7674   case ISD::VP_FP_TO_UINT:
7675     if (SDValue Op1 = Op.getOperand(0);
7676         Op1.getValueType().isVector() &&
7677         ((Op1.getValueType().getScalarType() == MVT::f16 &&
7678           (Subtarget.hasVInstructionsF16Minimal() &&
7679            !Subtarget.hasVInstructionsF16())) ||
7680          Op1.getValueType().getScalarType() == MVT::bf16)) {
7681       if (isPromotedOpNeedingSplit(Op1, Subtarget))
7682         return SplitVectorOp(Op, DAG);
7683       // [b]f16 -> f32
7684       SDLoc DL(Op);
7685       MVT NVT = MVT::getVectorVT(MVT::f32,
7686                                  Op1.getValueType().getVectorElementCount());
7687       SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7688       // f32 -> int
7689       return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7690                          {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7691     }
7692     return lowerVPFPIntConvOp(Op, DAG);
7693   case ISD::VP_SETCC:
7694     if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7695       return SplitVPOp(Op, DAG);
7696     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7697       return lowerVPSetCCMaskOp(Op, DAG);
7698     [[fallthrough]];
7699   case ISD::VP_SMIN:
7700   case ISD::VP_SMAX:
7701   case ISD::VP_UMIN:
7702   case ISD::VP_UMAX:
7703   case ISD::VP_BITREVERSE:
7704   case ISD::VP_BSWAP:
7705     return lowerVPOp(Op, DAG);
7706   case ISD::VP_CTLZ:
7707   case ISD::VP_CTLZ_ZERO_UNDEF:
7708     if (Subtarget.hasStdExtZvbb())
7709       return lowerVPOp(Op, DAG);
7710     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7711   case ISD::VP_CTTZ:
7712   case ISD::VP_CTTZ_ZERO_UNDEF:
7713     if (Subtarget.hasStdExtZvbb())
7714       return lowerVPOp(Op, DAG);
7715     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7716   case ISD::VP_CTPOP:
7717     return lowerVPOp(Op, DAG);
7718   case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7719     return lowerVPStridedLoad(Op, DAG);
7720   case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7721     return lowerVPStridedStore(Op, DAG);
7722   case ISD::VP_FCEIL:
7723   case ISD::VP_FFLOOR:
7724   case ISD::VP_FRINT:
7725   case ISD::VP_FNEARBYINT:
7726   case ISD::VP_FROUND:
7727   case ISD::VP_FROUNDEVEN:
7728   case ISD::VP_FROUNDTOZERO:
7729     if (isPromotedOpNeedingSplit(Op, Subtarget))
7730       return SplitVPOp(Op, DAG);
7731     return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7732   case ISD::VP_FMAXIMUM:
7733   case ISD::VP_FMINIMUM:
7734     if (isPromotedOpNeedingSplit(Op, Subtarget))
7735       return SplitVPOp(Op, DAG);
7736     return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7737   case ISD::EXPERIMENTAL_VP_SPLICE:
7738     return lowerVPSpliceExperimental(Op, DAG);
7739   case ISD::EXPERIMENTAL_VP_REVERSE:
7740     return lowerVPReverseExperimental(Op, DAG);
7741   case ISD::EXPERIMENTAL_VP_SPLAT:
7742     return lowerVPSplatExperimental(Op, DAG);
7743   case ISD::CLEAR_CACHE: {
7744     assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7745            "llvm.clear_cache only needs custom lower on Linux targets");
7746     SDLoc DL(Op);
7747     SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7748     return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7749                            Op.getOperand(2), Flags, DL);
7750   }
7751   case ISD::DYNAMIC_STACKALLOC:
7752     return lowerDYNAMIC_STACKALLOC(Op, DAG);
7753   case ISD::INIT_TRAMPOLINE:
7754     return lowerINIT_TRAMPOLINE(Op, DAG);
7755   case ISD::ADJUST_TRAMPOLINE:
7756     return lowerADJUST_TRAMPOLINE(Op, DAG);
7757   }
7758 }
7759 
7760 SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7761                                              SDValue Start, SDValue End,
7762                                              SDValue Flags, SDLoc DL) const {
7763   MakeLibCallOptions CallOptions;
7764   std::pair<SDValue, SDValue> CallResult =
7765       makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7766                   {Start, End, Flags}, CallOptions, DL, InChain);
7767 
7768   // This function returns void so only the out chain matters.
7769   return CallResult.second;
7770 }
7771 
7772 SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7773                                                   SelectionDAG &DAG) const {
7774   if (!Subtarget.is64Bit())
7775     llvm::report_fatal_error("Trampolines only implemented for RV64");
7776 
7777   // Create an MCCodeEmitter to encode instructions.
7778   TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
7779   assert(TLO);
7780   MCContext &MCCtx = TLO->getContext();
7781 
7782   std::unique_ptr<MCCodeEmitter> CodeEmitter(
7783       createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7784 
7785   SDValue Root = Op.getOperand(0);
7786   SDValue Trmp = Op.getOperand(1); // trampoline
7787   SDLoc dl(Op);
7788 
7789   const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7790 
7791   // We store in the trampoline buffer the following instructions and data.
7792   // Offset:
7793   //      0: auipc   t2, 0
7794   //      4: ld      t0, 24(t2)
7795   //      8: ld      t2, 16(t2)
7796   //     12: jalr    t0
7797   //     16: <StaticChainOffset>
7798   //     24: <FunctionAddressOffset>
7799   //     32:
7800 
7801   constexpr unsigned StaticChainOffset = 16;
7802   constexpr unsigned FunctionAddressOffset = 24;
7803 
7804   const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
7805   assert(STI);
7806   auto GetEncoding = [&](const MCInst &MC) {
7807     SmallVector<char, 4> CB;
7808     SmallVector<MCFixup> Fixups;
7809     CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7810     uint32_t Encoding = support::endian::read32le(CB.data());
7811     return Encoding;
7812   };
7813 
7814   SDValue OutChains[6];
7815 
7816   uint32_t Encodings[] = {
7817       // auipc t2, 0
7818       // Loads the current PC into t2.
7819       GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7820       // ld t0, 24(t2)
7821       // Loads the function address into t0. Note that we are using offsets
7822       // pc-relative to the first instruction of the trampoline.
7823       GetEncoding(
7824           MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7825               FunctionAddressOffset)),
7826       // ld t2, 16(t2)
7827       // Load the value of the static chain.
7828       GetEncoding(
7829           MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7830               StaticChainOffset)),
7831       // jalr t0
7832       // Jump to the function.
7833       GetEncoding(MCInstBuilder(RISCV::JALR)
7834                       .addReg(RISCV::X0)
7835                       .addReg(RISCV::X5)
7836                       .addImm(0))};
7837 
7838   // Store encoded instructions.
7839   for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7840     SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7841                                          DAG.getConstant(Idx * 4, dl, MVT::i64))
7842                            : Trmp;
7843     OutChains[Idx] = DAG.getTruncStore(
7844         Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7845         MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7846   }
7847 
7848   // Now store the variable part of the trampoline.
7849   SDValue FunctionAddress = Op.getOperand(2);
7850   SDValue StaticChain = Op.getOperand(3);
7851 
7852   // Store the given static chain and function pointer in the trampoline buffer.
7853   struct OffsetValuePair {
7854     const unsigned Offset;
7855     const SDValue Value;
7856     SDValue Addr = SDValue(); // Used to cache the address.
7857   } OffsetValues[] = {
7858       {StaticChainOffset, StaticChain},
7859       {FunctionAddressOffset, FunctionAddress},
7860   };
7861   for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7862     SDValue Addr =
7863         DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7864                     DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7865     OffsetValue.Addr = Addr;
7866     OutChains[Idx + 4] =
7867         DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7868                      MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7869   }
7870 
7871   SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7872 
7873   // The end of instructions of trampoline is the same as the static chain
7874   // address that we computed earlier.
7875   SDValue EndOfTrmp = OffsetValues[0].Addr;
7876 
7877   // Call clear cache on the trampoline instructions.
7878   SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7879                               Trmp, EndOfTrmp);
7880 
7881   return Chain;
7882 }
7883 
7884 SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7885                                                     SelectionDAG &DAG) const {
7886   if (!Subtarget.is64Bit())
7887     llvm::report_fatal_error("Trampolines only implemented for RV64");
7888 
7889   return Op.getOperand(0);
7890 }
7891 
7892 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
7893                              SelectionDAG &DAG, unsigned Flags) {
7894   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7895 }
7896 
7897 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
7898                              SelectionDAG &DAG, unsigned Flags) {
7899   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7900                                    Flags);
7901 }
7902 
7903 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
7904                              SelectionDAG &DAG, unsigned Flags) {
7905   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7906                                    N->getOffset(), Flags);
7907 }
7908 
7909 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
7910                              SelectionDAG &DAG, unsigned Flags) {
7911   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7912 }
7913 
7914 static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL,
7915                                      EVT Ty, SelectionDAG &DAG) {
7916   RISCVConstantPoolValue *CPV = RISCVConstantPoolValue::Create(N->getGlobal());
7917   SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7918   SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7919   return DAG.getLoad(
7920       Ty, DL, DAG.getEntryNode(), LC,
7921       MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7922 }
7923 
7924 static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL,
7925                                       EVT Ty, SelectionDAG &DAG) {
7926   RISCVConstantPoolValue *CPV =
7927       RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7928   SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7929   SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7930   return DAG.getLoad(
7931       Ty, DL, DAG.getEntryNode(), LC,
7932       MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7933 }
7934 
7935 template <class NodeTy>
7936 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7937                                      bool IsLocal, bool IsExternWeak) const {
7938   SDLoc DL(N);
7939   EVT Ty = getPointerTy(DAG.getDataLayout());
7940 
7941   // When HWASAN is used and tagging of global variables is enabled
7942   // they should be accessed via the GOT, since the tagged address of a global
7943   // is incompatible with existing code models. This also applies to non-pic
7944   // mode.
7945   if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7946     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7947     if (IsLocal && !Subtarget.allowTaggedGlobals())
7948       // Use PC-relative addressing to access the symbol. This generates the
7949       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7950       // %pcrel_lo(auipc)).
7951       return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7952 
7953     // Use PC-relative addressing to access the GOT for this symbol, then load
7954     // the address from the GOT. This generates the pattern (PseudoLGA sym),
7955     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7956     SDValue Load =
7957         SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7958     MachineFunction &MF = DAG.getMachineFunction();
7959     MachineMemOperand *MemOp = MF.getMachineMemOperand(
7960         MachinePointerInfo::getGOT(MF),
7961         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
7962             MachineMemOperand::MOInvariant,
7963         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7964     DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7965     return Load;
7966   }
7967 
7968   switch (getTargetMachine().getCodeModel()) {
7969   default:
7970     report_fatal_error("Unsupported code model for lowering");
7971   case CodeModel::Small: {
7972     // Generate a sequence for accessing addresses within the first 2 GiB of
7973     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7974     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7975     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7976     SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7977     return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7978   }
7979   case CodeModel::Medium: {
7980     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7981     if (IsExternWeak) {
7982       // An extern weak symbol may be undefined, i.e. have value 0, which may
7983       // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7984       // symbol. This generates the pattern (PseudoLGA sym), which expands to
7985       // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7986       SDValue Load =
7987           SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7988       MachineFunction &MF = DAG.getMachineFunction();
7989       MachineMemOperand *MemOp = MF.getMachineMemOperand(
7990           MachinePointerInfo::getGOT(MF),
7991           MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
7992               MachineMemOperand::MOInvariant,
7993           LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7994       DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7995       return Load;
7996     }
7997 
7998     // Generate a sequence for accessing addresses within any 2GiB range within
7999     // the address space. This generates the pattern (PseudoLLA sym), which
8000     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8001     return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8002   }
8003   case CodeModel::Large: {
8004     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8005       return getLargeGlobalAddress(G, DL, Ty, DAG);
8006 
8007     // Using pc-relative mode for other node type.
8008     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8009     return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8010   }
8011   }
8012 }
8013 
8014 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8015                                                 SelectionDAG &DAG) const {
8016   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8017   assert(N->getOffset() == 0 && "unexpected offset in global node");
8018   const GlobalValue *GV = N->getGlobal();
8019   return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8020 }
8021 
8022 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8023                                                SelectionDAG &DAG) const {
8024   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8025 
8026   return getAddr(N, DAG);
8027 }
8028 
8029 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8030                                                SelectionDAG &DAG) const {
8031   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8032 
8033   return getAddr(N, DAG);
8034 }
8035 
8036 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8037                                             SelectionDAG &DAG) const {
8038   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8039 
8040   return getAddr(N, DAG);
8041 }
8042 
8043 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8044                                               SelectionDAG &DAG,
8045                                               bool UseGOT) const {
8046   SDLoc DL(N);
8047   EVT Ty = getPointerTy(DAG.getDataLayout());
8048   const GlobalValue *GV = N->getGlobal();
8049   MVT XLenVT = Subtarget.getXLenVT();
8050 
8051   if (UseGOT) {
8052     // Use PC-relative addressing to access the GOT for this TLS symbol, then
8053     // load the address from the GOT and add the thread pointer. This generates
8054     // the pattern (PseudoLA_TLS_IE sym), which expands to
8055     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8056     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8057     SDValue Load =
8058         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8059     MachineFunction &MF = DAG.getMachineFunction();
8060     MachineMemOperand *MemOp = MF.getMachineMemOperand(
8061         MachinePointerInfo::getGOT(MF),
8062         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
8063             MachineMemOperand::MOInvariant,
8064         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8065     DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8066 
8067     // Add the thread pointer.
8068     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8069     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8070   }
8071 
8072   // Generate a sequence for accessing the address relative to the thread
8073   // pointer, with the appropriate adjustment for the thread pointer offset.
8074   // This generates the pattern
8075   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8076   SDValue AddrHi =
8077       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
8078   SDValue AddrAdd =
8079       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
8080   SDValue AddrLo =
8081       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
8082 
8083   SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8084   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8085   SDValue MNAdd =
8086       DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8087   return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8088 }
8089 
8090 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8091                                                SelectionDAG &DAG) const {
8092   SDLoc DL(N);
8093   EVT Ty = getPointerTy(DAG.getDataLayout());
8094   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8095   const GlobalValue *GV = N->getGlobal();
8096 
8097   // Use a PC-relative addressing mode to access the global dynamic GOT address.
8098   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8099   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8100   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8101   SDValue Load =
8102       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8103 
8104   // Prepare argument list to generate call.
8105   ArgListTy Args;
8106   ArgListEntry Entry;
8107   Entry.Node = Load;
8108   Entry.Ty = CallTy;
8109   Args.push_back(Entry);
8110 
8111   // Setup call to __tls_get_addr.
8112   TargetLowering::CallLoweringInfo CLI(DAG);
8113   CLI.setDebugLoc(DL)
8114       .setChain(DAG.getEntryNode())
8115       .setLibCallee(CallingConv::C, CallTy,
8116                     DAG.getExternalSymbol("__tls_get_addr", Ty),
8117                     std::move(Args));
8118 
8119   return LowerCallTo(CLI).first;
8120 }
8121 
8122 SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8123                                             SelectionDAG &DAG) const {
8124   SDLoc DL(N);
8125   EVT Ty = getPointerTy(DAG.getDataLayout());
8126   const GlobalValue *GV = N->getGlobal();
8127 
8128   // Use a PC-relative addressing mode to access the global dynamic GOT address.
8129   // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8130   //
8131   // auipc tX, %tlsdesc_hi(symbol)         // R_RISCV_TLSDESC_HI20(symbol)
8132   // lw    tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8133   // addi  a0, tX, %tlsdesc_add_lo(label)  // R_RISCV_TLSDESC_ADD_LO12(label)
8134   // jalr  t0, tY                          // R_RISCV_TLSDESC_CALL(label)
8135   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8136   return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8137 }
8138 
8139 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8140                                                    SelectionDAG &DAG) const {
8141   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8142   assert(N->getOffset() == 0 && "unexpected offset in global node");
8143 
8144   if (DAG.getTarget().useEmulatedTLS())
8145     return LowerToTLSEmulatedModel(N, DAG);
8146 
8147   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
8148 
8149   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
8150       CallingConv::GHC)
8151     report_fatal_error("In GHC calling convention TLS is not supported");
8152 
8153   SDValue Addr;
8154   switch (Model) {
8155   case TLSModel::LocalExec:
8156     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8157     break;
8158   case TLSModel::InitialExec:
8159     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8160     break;
8161   case TLSModel::LocalDynamic:
8162   case TLSModel::GeneralDynamic:
8163     Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8164                                         : getDynamicTLSAddr(N, DAG);
8165     break;
8166   }
8167 
8168   return Addr;
8169 }
8170 
8171 // Return true if Val is equal to (setcc LHS, RHS, CC).
8172 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
8173 // Otherwise, return std::nullopt.
8174 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8175                                       ISD::CondCode CC, SDValue Val) {
8176   assert(Val->getOpcode() == ISD::SETCC);
8177   SDValue LHS2 = Val.getOperand(0);
8178   SDValue RHS2 = Val.getOperand(1);
8179   ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8180 
8181   if (LHS == LHS2 && RHS == RHS2) {
8182     if (CC == CC2)
8183       return true;
8184     if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8185       return false;
8186   } else if (LHS == RHS2 && RHS == LHS2) {
8187     CC2 = ISD::getSetCCSwappedOperands(CC2);
8188     if (CC == CC2)
8189       return true;
8190     if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8191       return false;
8192   }
8193 
8194   return std::nullopt;
8195 }
8196 
8197 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
8198                                     const RISCVSubtarget &Subtarget) {
8199   SDValue CondV = N->getOperand(0);
8200   SDValue TrueV = N->getOperand(1);
8201   SDValue FalseV = N->getOperand(2);
8202   MVT VT = N->getSimpleValueType(0);
8203   SDLoc DL(N);
8204 
8205   if (!Subtarget.hasConditionalMoveFusion()) {
8206     // (select c, -1, y) -> -c | y
8207     if (isAllOnesConstant(TrueV)) {
8208       SDValue Neg = DAG.getNegative(CondV, DL, VT);
8209       return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8210     }
8211     // (select c, y, -1) -> (c-1) | y
8212     if (isAllOnesConstant(FalseV)) {
8213       SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8214                                 DAG.getAllOnesConstant(DL, VT));
8215       return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8216     }
8217 
8218     // (select c, 0, y) -> (c-1) & y
8219     if (isNullConstant(TrueV)) {
8220       SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8221                                 DAG.getAllOnesConstant(DL, VT));
8222       return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8223     }
8224     // (select c, y, 0) -> -c & y
8225     if (isNullConstant(FalseV)) {
8226       SDValue Neg = DAG.getNegative(CondV, DL, VT);
8227       return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8228     }
8229   }
8230 
8231   // select c, ~x, x --> xor -c, x
8232   if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8233     const APInt &TrueVal = TrueV->getAsAPIntVal();
8234     const APInt &FalseVal = FalseV->getAsAPIntVal();
8235     if (~TrueVal == FalseVal) {
8236       SDValue Neg = DAG.getNegative(CondV, DL, VT);
8237       return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8238     }
8239   }
8240 
8241   // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8242   // when both truev and falsev are also setcc.
8243   if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8244       FalseV.getOpcode() == ISD::SETCC) {
8245     SDValue LHS = CondV.getOperand(0);
8246     SDValue RHS = CondV.getOperand(1);
8247     ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8248 
8249     // (select x, x, y) -> x | y
8250     // (select !x, x, y) -> x & y
8251     if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8252       return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8253                          DAG.getFreeze(FalseV));
8254     }
8255     // (select x, y, x) -> x & y
8256     // (select !x, y, x) -> x | y
8257     if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8258       return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8259                          DAG.getFreeze(TrueV), FalseV);
8260     }
8261   }
8262 
8263   return SDValue();
8264 }
8265 
8266 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8267 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8268 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8269 // being `0` or `-1`. In such cases we can replace `select` with `and`.
8270 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8271 // than `c0`?
8272 static SDValue
8273 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
8274                                 const RISCVSubtarget &Subtarget) {
8275   if (Subtarget.hasShortForwardBranchOpt())
8276     return SDValue();
8277 
8278   unsigned SelOpNo = 0;
8279   SDValue Sel = BO->getOperand(0);
8280   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8281     SelOpNo = 1;
8282     Sel = BO->getOperand(1);
8283   }
8284 
8285   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8286     return SDValue();
8287 
8288   unsigned ConstSelOpNo = 1;
8289   unsigned OtherSelOpNo = 2;
8290   if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8291     ConstSelOpNo = 2;
8292     OtherSelOpNo = 1;
8293   }
8294   SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8295   ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8296   if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8297     return SDValue();
8298 
8299   SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8300   ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8301   if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8302     return SDValue();
8303 
8304   SDLoc DL(Sel);
8305   EVT VT = BO->getValueType(0);
8306 
8307   SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8308   if (SelOpNo == 1)
8309     std::swap(NewConstOps[0], NewConstOps[1]);
8310 
8311   SDValue NewConstOp =
8312       DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8313   if (!NewConstOp)
8314     return SDValue();
8315 
8316   const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8317   if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8318     return SDValue();
8319 
8320   SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8321   SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8322   if (SelOpNo == 1)
8323     std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8324   SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8325 
8326   SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8327   SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8328   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8329 }
8330 
8331 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8332   SDValue CondV = Op.getOperand(0);
8333   SDValue TrueV = Op.getOperand(1);
8334   SDValue FalseV = Op.getOperand(2);
8335   SDLoc DL(Op);
8336   MVT VT = Op.getSimpleValueType();
8337   MVT XLenVT = Subtarget.getXLenVT();
8338 
8339   // Lower vector SELECTs to VSELECTs by splatting the condition.
8340   if (VT.isVector()) {
8341     MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8342     SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8343     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8344   }
8345 
8346   // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8347   // nodes to implement the SELECT. Performing the lowering here allows for
8348   // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8349   // sequence or RISCVISD::SELECT_CC node (branch-based select).
8350   if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8351       VT.isScalarInteger()) {
8352     // (select c, t, 0) -> (czero_eqz t, c)
8353     if (isNullConstant(FalseV))
8354       return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8355     // (select c, 0, f) -> (czero_nez f, c)
8356     if (isNullConstant(TrueV))
8357       return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8358 
8359     // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8360     if (TrueV.getOpcode() == ISD::AND &&
8361         (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8362       return DAG.getNode(
8363           ISD::OR, DL, VT, TrueV,
8364           DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8365     // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8366     if (FalseV.getOpcode() == ISD::AND &&
8367         (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8368       return DAG.getNode(
8369           ISD::OR, DL, VT, FalseV,
8370           DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8371 
8372     // Try some other optimizations before falling back to generic lowering.
8373     if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8374       return V;
8375 
8376     // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8377     // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8378     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8379       const APInt &TrueVal = TrueV->getAsAPIntVal();
8380       const APInt &FalseVal = FalseV->getAsAPIntVal();
8381       const int TrueValCost = RISCVMatInt::getIntMatCost(
8382           TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8383       const int FalseValCost = RISCVMatInt::getIntMatCost(
8384           FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8385       bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8386       SDValue LHSVal = DAG.getConstant(
8387           IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8388       SDValue RHSVal =
8389           DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8390       SDValue CMOV =
8391           DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
8392                       DL, VT, LHSVal, CondV);
8393       return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8394     }
8395 
8396     // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8397     // Unless we have the short forward branch optimization.
8398     if (!Subtarget.hasConditionalMoveFusion())
8399       return DAG.getNode(
8400           ISD::OR, DL, VT,
8401           DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8402           DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8403   }
8404 
8405   if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8406     return V;
8407 
8408   if (Op.hasOneUse()) {
8409     unsigned UseOpc = Op->user_begin()->getOpcode();
8410     if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8411       SDNode *BinOp = *Op->user_begin();
8412       if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8413                                                            DAG, Subtarget)) {
8414         DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8415         // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8416         // may return a constant node and cause crash in lowerSELECT.
8417         if (NewSel.getOpcode() == ISD::SELECT)
8418           return lowerSELECT(NewSel, DAG);
8419         return NewSel;
8420       }
8421     }
8422   }
8423 
8424   // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8425   // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8426   const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8427   const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8428   if (FPTV && FPFV) {
8429     if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8430       return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8431     if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8432       SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8433                                 DAG.getConstant(1, DL, XLenVT));
8434       return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8435     }
8436   }
8437 
8438   // If the condition is not an integer SETCC which operates on XLenVT, we need
8439   // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8440   // (select condv, truev, falsev)
8441   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8442   if (CondV.getOpcode() != ISD::SETCC ||
8443       CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8444     SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8445     SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8446 
8447     SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8448 
8449     return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8450   }
8451 
8452   // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8453   // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8454   // advantage of the integer compare+branch instructions. i.e.:
8455   // (select (setcc lhs, rhs, cc), truev, falsev)
8456   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8457   SDValue LHS = CondV.getOperand(0);
8458   SDValue RHS = CondV.getOperand(1);
8459   ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8460 
8461   // Special case for a select of 2 constants that have a diffence of 1.
8462   // Normally this is done by DAGCombine, but if the select is introduced by
8463   // type legalization or op legalization, we miss it. Restricting to SETLT
8464   // case for now because that is what signed saturating add/sub need.
8465   // FIXME: We don't need the condition to be SETLT or even a SETCC,
8466   // but we would probably want to swap the true/false values if the condition
8467   // is SETGE/SETLE to avoid an XORI.
8468   if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8469       CCVal == ISD::SETLT) {
8470     const APInt &TrueVal = TrueV->getAsAPIntVal();
8471     const APInt &FalseVal = FalseV->getAsAPIntVal();
8472     if (TrueVal - 1 == FalseVal)
8473       return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8474     if (TrueVal + 1 == FalseVal)
8475       return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8476   }
8477 
8478   translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8479   // 1 < x ? x : 1 -> 0 < x ? x : 1
8480   if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8481       RHS == TrueV && LHS == FalseV) {
8482     LHS = DAG.getConstant(0, DL, VT);
8483     // 0 <u x is the same as x != 0.
8484     if (CCVal == ISD::SETULT) {
8485       std::swap(LHS, RHS);
8486       CCVal = ISD::SETNE;
8487     }
8488   }
8489 
8490   // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8491   if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8492       RHS == FalseV) {
8493     RHS = DAG.getConstant(0, DL, VT);
8494   }
8495 
8496   SDValue TargetCC = DAG.getCondCode(CCVal);
8497 
8498   if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8499     // (select (setcc lhs, rhs, CC), constant, falsev)
8500     // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8501     std::swap(TrueV, FalseV);
8502     TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8503   }
8504 
8505   SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8506   return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8507 }
8508 
8509 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8510   SDValue CondV = Op.getOperand(1);
8511   SDLoc DL(Op);
8512   MVT XLenVT = Subtarget.getXLenVT();
8513 
8514   if (CondV.getOpcode() == ISD::SETCC &&
8515       CondV.getOperand(0).getValueType() == XLenVT) {
8516     SDValue LHS = CondV.getOperand(0);
8517     SDValue RHS = CondV.getOperand(1);
8518     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8519 
8520     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8521 
8522     SDValue TargetCC = DAG.getCondCode(CCVal);
8523     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8524                        LHS, RHS, TargetCC, Op.getOperand(2));
8525   }
8526 
8527   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8528                      CondV, DAG.getConstant(0, DL, XLenVT),
8529                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8530 }
8531 
8532 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8533   MachineFunction &MF = DAG.getMachineFunction();
8534   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
8535 
8536   SDLoc DL(Op);
8537   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8538                                  getPointerTy(MF.getDataLayout()));
8539 
8540   // vastart just stores the address of the VarArgsFrameIndex slot into the
8541   // memory location argument.
8542   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8543   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8544                       MachinePointerInfo(SV));
8545 }
8546 
8547 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8548                                             SelectionDAG &DAG) const {
8549   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8550   MachineFunction &MF = DAG.getMachineFunction();
8551   MachineFrameInfo &MFI = MF.getFrameInfo();
8552   MFI.setFrameAddressIsTaken(true);
8553   Register FrameReg = RI.getFrameRegister(MF);
8554   int XLenInBytes = Subtarget.getXLen() / 8;
8555 
8556   EVT VT = Op.getValueType();
8557   SDLoc DL(Op);
8558   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8559   unsigned Depth = Op.getConstantOperandVal(0);
8560   while (Depth--) {
8561     int Offset = -(XLenInBytes * 2);
8562     SDValue Ptr = DAG.getNode(
8563         ISD::ADD, DL, VT, FrameAddr,
8564         DAG.getSignedConstant(Offset, DL, getPointerTy(DAG.getDataLayout())));
8565     FrameAddr =
8566         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8567   }
8568   return FrameAddr;
8569 }
8570 
8571 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8572                                              SelectionDAG &DAG) const {
8573   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8574   MachineFunction &MF = DAG.getMachineFunction();
8575   MachineFrameInfo &MFI = MF.getFrameInfo();
8576   MFI.setReturnAddressIsTaken(true);
8577   MVT XLenVT = Subtarget.getXLenVT();
8578   int XLenInBytes = Subtarget.getXLen() / 8;
8579 
8580   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
8581     return SDValue();
8582 
8583   EVT VT = Op.getValueType();
8584   SDLoc DL(Op);
8585   unsigned Depth = Op.getConstantOperandVal(0);
8586   if (Depth) {
8587     int Off = -XLenInBytes;
8588     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8589     SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8590     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8591                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8592                        MachinePointerInfo());
8593   }
8594 
8595   // Return the value of the return address register, marking it an implicit
8596   // live-in.
8597   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8598   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8599 }
8600 
8601 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8602                                                  SelectionDAG &DAG) const {
8603   SDLoc DL(Op);
8604   SDValue Lo = Op.getOperand(0);
8605   SDValue Hi = Op.getOperand(1);
8606   SDValue Shamt = Op.getOperand(2);
8607   EVT VT = Lo.getValueType();
8608 
8609   // if Shamt-XLEN < 0: // Shamt < XLEN
8610   //   Lo = Lo << Shamt
8611   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8612   // else:
8613   //   Lo = 0
8614   //   Hi = Lo << (Shamt-XLEN)
8615 
8616   SDValue Zero = DAG.getConstant(0, DL, VT);
8617   SDValue One = DAG.getConstant(1, DL, VT);
8618   SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8619   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8620   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8621   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8622 
8623   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8624   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8625   SDValue ShiftRightLo =
8626       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8627   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8628   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8629   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8630 
8631   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8632 
8633   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8634   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8635 
8636   SDValue Parts[2] = {Lo, Hi};
8637   return DAG.getMergeValues(Parts, DL);
8638 }
8639 
8640 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8641                                                   bool IsSRA) const {
8642   SDLoc DL(Op);
8643   SDValue Lo = Op.getOperand(0);
8644   SDValue Hi = Op.getOperand(1);
8645   SDValue Shamt = Op.getOperand(2);
8646   EVT VT = Lo.getValueType();
8647 
8648   // SRA expansion:
8649   //   if Shamt-XLEN < 0: // Shamt < XLEN
8650   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8651   //     Hi = Hi >>s Shamt
8652   //   else:
8653   //     Lo = Hi >>s (Shamt-XLEN);
8654   //     Hi = Hi >>s (XLEN-1)
8655   //
8656   // SRL expansion:
8657   //   if Shamt-XLEN < 0: // Shamt < XLEN
8658   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8659   //     Hi = Hi >>u Shamt
8660   //   else:
8661   //     Lo = Hi >>u (Shamt-XLEN);
8662   //     Hi = 0;
8663 
8664   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8665 
8666   SDValue Zero = DAG.getConstant(0, DL, VT);
8667   SDValue One = DAG.getConstant(1, DL, VT);
8668   SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8669   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8670   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8671   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8672 
8673   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8674   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8675   SDValue ShiftLeftHi =
8676       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8677   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8678   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8679   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8680   SDValue HiFalse =
8681       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8682 
8683   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8684 
8685   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8686   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8687 
8688   SDValue Parts[2] = {Lo, Hi};
8689   return DAG.getMergeValues(Parts, DL);
8690 }
8691 
8692 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
8693 // legal equivalently-sized i8 type, so we can use that as a go-between.
8694 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8695                                                   SelectionDAG &DAG) const {
8696   SDLoc DL(Op);
8697   MVT VT = Op.getSimpleValueType();
8698   SDValue SplatVal = Op.getOperand(0);
8699   // All-zeros or all-ones splats are handled specially.
8700   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8701     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8702     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8703   }
8704   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8705     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8706     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8707   }
8708   MVT InterVT = VT.changeVectorElementType(MVT::i8);
8709   SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8710                          DAG.getConstant(1, DL, SplatVal.getValueType()));
8711   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8712   SDValue Zero = DAG.getConstant(0, DL, InterVT);
8713   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8714 }
8715 
8716 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8717 // illegal (currently only vXi64 RV32).
8718 // FIXME: We could also catch non-constant sign-extended i32 values and lower
8719 // them to VMV_V_X_VL.
8720 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8721                                                      SelectionDAG &DAG) const {
8722   SDLoc DL(Op);
8723   MVT VecVT = Op.getSimpleValueType();
8724   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8725          "Unexpected SPLAT_VECTOR_PARTS lowering");
8726 
8727   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8728   SDValue Lo = Op.getOperand(0);
8729   SDValue Hi = Op.getOperand(1);
8730 
8731   MVT ContainerVT = VecVT;
8732   if (VecVT.isFixedLengthVector())
8733     ContainerVT = getContainerForFixedLengthVector(VecVT);
8734 
8735   auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8736 
8737   SDValue Res =
8738       splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8739 
8740   if (VecVT.isFixedLengthVector())
8741     Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8742 
8743   return Res;
8744 }
8745 
8746 // Custom-lower extensions from mask vectors by using a vselect either with 1
8747 // for zero/any-extension or -1 for sign-extension:
8748 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8749 // Note that any-extension is lowered identically to zero-extension.
8750 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8751                                                 int64_t ExtTrueVal) const {
8752   SDLoc DL(Op);
8753   MVT VecVT = Op.getSimpleValueType();
8754   SDValue Src = Op.getOperand(0);
8755   // Only custom-lower extensions from mask types
8756   assert(Src.getValueType().isVector() &&
8757          Src.getValueType().getVectorElementType() == MVT::i1);
8758 
8759   if (VecVT.isScalableVector()) {
8760     SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8761     SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8762     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8763   }
8764 
8765   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8766   MVT I1ContainerVT =
8767       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8768 
8769   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8770 
8771   SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8772 
8773   MVT XLenVT = Subtarget.getXLenVT();
8774   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8775   SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8776 
8777   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8778                           DAG.getUNDEF(ContainerVT), SplatZero, VL);
8779   SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8780                              DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8781   SDValue Select =
8782       DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8783                   SplatZero, DAG.getUNDEF(ContainerVT), VL);
8784 
8785   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8786 }
8787 
8788 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8789     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8790   MVT ExtVT = Op.getSimpleValueType();
8791   // Only custom-lower extensions from fixed-length vector types.
8792   if (!ExtVT.isFixedLengthVector())
8793     return Op;
8794   MVT VT = Op.getOperand(0).getSimpleValueType();
8795   // Grab the canonical container type for the extended type. Infer the smaller
8796   // type from that to ensure the same number of vector elements, as we know
8797   // the LMUL will be sufficient to hold the smaller type.
8798   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8799   // Get the extended container type manually to ensure the same number of
8800   // vector elements between source and dest.
8801   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8802                                      ContainerExtVT.getVectorElementCount());
8803 
8804   SDValue Op1 =
8805       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8806 
8807   SDLoc DL(Op);
8808   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8809 
8810   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8811 
8812   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8813 }
8814 
8815 // Custom-lower truncations from vectors to mask vectors by using a mask and a
8816 // setcc operation:
8817 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8818 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8819                                                       SelectionDAG &DAG) const {
8820   bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8821   SDLoc DL(Op);
8822   EVT MaskVT = Op.getValueType();
8823   // Only expect to custom-lower truncations to mask types
8824   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8825          "Unexpected type for vector mask lowering");
8826   SDValue Src = Op.getOperand(0);
8827   MVT VecVT = Src.getSimpleValueType();
8828   SDValue Mask, VL;
8829   if (IsVPTrunc) {
8830     Mask = Op.getOperand(1);
8831     VL = Op.getOperand(2);
8832   }
8833   // If this is a fixed vector, we need to convert it to a scalable vector.
8834   MVT ContainerVT = VecVT;
8835 
8836   if (VecVT.isFixedLengthVector()) {
8837     ContainerVT = getContainerForFixedLengthVector(VecVT);
8838     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8839     if (IsVPTrunc) {
8840       MVT MaskContainerVT =
8841           getContainerForFixedLengthVector(Mask.getSimpleValueType());
8842       Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8843     }
8844   }
8845 
8846   if (!IsVPTrunc) {
8847     std::tie(Mask, VL) =
8848         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8849   }
8850 
8851   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8852   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8853 
8854   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8855                          DAG.getUNDEF(ContainerVT), SplatOne, VL);
8856   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8857                           DAG.getUNDEF(ContainerVT), SplatZero, VL);
8858 
8859   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8860   SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8861                               DAG.getUNDEF(ContainerVT), Mask, VL);
8862   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8863                       {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8864                        DAG.getUNDEF(MaskContainerVT), Mask, VL});
8865   if (MaskVT.isFixedLengthVector())
8866     Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8867   return Trunc;
8868 }
8869 
8870 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8871                                                   SelectionDAG &DAG) const {
8872   unsigned Opc = Op.getOpcode();
8873   bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8874   SDLoc DL(Op);
8875 
8876   MVT VT = Op.getSimpleValueType();
8877   // Only custom-lower vector truncates
8878   assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8879 
8880   // Truncates to mask types are handled differently
8881   if (VT.getVectorElementType() == MVT::i1)
8882     return lowerVectorMaskTruncLike(Op, DAG);
8883 
8884   // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8885   // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8886   // truncate by one power of two at a time.
8887   MVT DstEltVT = VT.getVectorElementType();
8888 
8889   SDValue Src = Op.getOperand(0);
8890   MVT SrcVT = Src.getSimpleValueType();
8891   MVT SrcEltVT = SrcVT.getVectorElementType();
8892 
8893   assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8894          isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8895          "Unexpected vector truncate lowering");
8896 
8897   MVT ContainerVT = SrcVT;
8898   SDValue Mask, VL;
8899   if (IsVPTrunc) {
8900     Mask = Op.getOperand(1);
8901     VL = Op.getOperand(2);
8902   }
8903   if (SrcVT.isFixedLengthVector()) {
8904     ContainerVT = getContainerForFixedLengthVector(SrcVT);
8905     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8906     if (IsVPTrunc) {
8907       MVT MaskVT = getMaskTypeFor(ContainerVT);
8908       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8909     }
8910   }
8911 
8912   SDValue Result = Src;
8913   if (!IsVPTrunc) {
8914     std::tie(Mask, VL) =
8915         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8916   }
8917 
8918   unsigned NewOpc;
8919   if (Opc == ISD::TRUNCATE_SSAT_S)
8920     NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
8921   else if (Opc == ISD::TRUNCATE_USAT_U)
8922     NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
8923   else
8924     NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
8925 
8926   do {
8927     SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8928     MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8929     Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8930   } while (SrcEltVT != DstEltVT);
8931 
8932   if (SrcVT.isFixedLengthVector())
8933     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8934 
8935   return Result;
8936 }
8937 
8938 SDValue
8939 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8940                                                     SelectionDAG &DAG) const {
8941   SDLoc DL(Op);
8942   SDValue Chain = Op.getOperand(0);
8943   SDValue Src = Op.getOperand(1);
8944   MVT VT = Op.getSimpleValueType();
8945   MVT SrcVT = Src.getSimpleValueType();
8946   MVT ContainerVT = VT;
8947   if (VT.isFixedLengthVector()) {
8948     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8949     ContainerVT =
8950         SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8951     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8952   }
8953 
8954   auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8955 
8956   // RVV can only widen/truncate fp to types double/half the size as the source.
8957   if ((VT.getVectorElementType() == MVT::f64 &&
8958        (SrcVT.getVectorElementType() == MVT::f16 ||
8959         SrcVT.getVectorElementType() == MVT::bf16)) ||
8960       ((VT.getVectorElementType() == MVT::f16 ||
8961         VT.getVectorElementType() == MVT::bf16) &&
8962        SrcVT.getVectorElementType() == MVT::f64)) {
8963     // For double rounding, the intermediate rounding should be round-to-odd.
8964     unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8965                                 ? RISCVISD::STRICT_FP_EXTEND_VL
8966                                 : RISCVISD::STRICT_VFNCVT_ROD_VL;
8967     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8968     Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8969                       Chain, Src, Mask, VL);
8970     Chain = Src.getValue(1);
8971   }
8972 
8973   unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8974                          ? RISCVISD::STRICT_FP_EXTEND_VL
8975                          : RISCVISD::STRICT_FP_ROUND_VL;
8976   SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8977                             Chain, Src, Mask, VL);
8978   if (VT.isFixedLengthVector()) {
8979     // StrictFP operations have two result values. Their lowered result should
8980     // have same result count.
8981     SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8982     Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8983   }
8984   return Res;
8985 }
8986 
8987 SDValue
8988 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8989                                                     SelectionDAG &DAG) const {
8990   bool IsVP =
8991       Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8992   bool IsExtend =
8993       Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8994   // RVV can only do truncate fp to types half the size as the source. We
8995   // custom-lower f64->f16 rounds via RVV's round-to-odd float
8996   // conversion instruction.
8997   SDLoc DL(Op);
8998   MVT VT = Op.getSimpleValueType();
8999 
9000   assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9001 
9002   SDValue Src = Op.getOperand(0);
9003   MVT SrcVT = Src.getSimpleValueType();
9004 
9005   bool IsDirectExtend =
9006       IsExtend && (VT.getVectorElementType() != MVT::f64 ||
9007                    (SrcVT.getVectorElementType() != MVT::f16 &&
9008                     SrcVT.getVectorElementType() != MVT::bf16));
9009   bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
9010                                       VT.getVectorElementType() != MVT::bf16) ||
9011                                      SrcVT.getVectorElementType() != MVT::f64);
9012 
9013   bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
9014 
9015   // Prepare any fixed-length vector operands.
9016   MVT ContainerVT = VT;
9017   SDValue Mask, VL;
9018   if (IsVP) {
9019     Mask = Op.getOperand(1);
9020     VL = Op.getOperand(2);
9021   }
9022   if (VT.isFixedLengthVector()) {
9023     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9024     ContainerVT =
9025         SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9026     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9027     if (IsVP) {
9028       MVT MaskVT = getMaskTypeFor(ContainerVT);
9029       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9030     }
9031   }
9032 
9033   if (!IsVP)
9034     std::tie(Mask, VL) =
9035         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9036 
9037   unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9038 
9039   if (IsDirectConv) {
9040     Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
9041     if (VT.isFixedLengthVector())
9042       Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
9043     return Src;
9044   }
9045 
9046   unsigned InterConvOpc =
9047       IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
9048 
9049   MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9050   SDValue IntermediateConv =
9051       DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
9052   SDValue Result =
9053       DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
9054   if (VT.isFixedLengthVector())
9055     return convertFromScalableVector(VT, Result, DAG, Subtarget);
9056   return Result;
9057 }
9058 
9059 // Given a scalable vector type and an index into it, returns the type for the
9060 // smallest subvector that the index fits in. This can be used to reduce LMUL
9061 // for operations like vslidedown.
9062 //
9063 // E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9064 static std::optional<MVT>
9065 getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9066                       const RISCVSubtarget &Subtarget) {
9067   assert(VecVT.isScalableVector());
9068   const unsigned EltSize = VecVT.getScalarSizeInBits();
9069   const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9070   const unsigned MinVLMAX = VectorBitsMin / EltSize;
9071   MVT SmallerVT;
9072   if (MaxIdx < MinVLMAX)
9073     SmallerVT = getLMUL1VT(VecVT);
9074   else if (MaxIdx < MinVLMAX * 2)
9075     SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9076   else if (MaxIdx < MinVLMAX * 4)
9077     SmallerVT = getLMUL1VT(VecVT)
9078                     .getDoubleNumVectorElementsVT()
9079                     .getDoubleNumVectorElementsVT();
9080   if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9081     return std::nullopt;
9082   return SmallerVT;
9083 }
9084 
9085 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9086 // first position of a vector, and that vector is slid up to the insert index.
9087 // By limiting the active vector length to index+1 and merging with the
9088 // original vector (with an undisturbed tail policy for elements >= VL), we
9089 // achieve the desired result of leaving all elements untouched except the one
9090 // at VL-1, which is replaced with the desired value.
9091 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9092                                                     SelectionDAG &DAG) const {
9093   SDLoc DL(Op);
9094   MVT VecVT = Op.getSimpleValueType();
9095   MVT XLenVT = Subtarget.getXLenVT();
9096   SDValue Vec = Op.getOperand(0);
9097   SDValue Val = Op.getOperand(1);
9098   MVT ValVT = Val.getSimpleValueType();
9099   SDValue Idx = Op.getOperand(2);
9100 
9101   if (VecVT.getVectorElementType() == MVT::i1) {
9102     // FIXME: For now we just promote to an i8 vector and insert into that,
9103     // but this is probably not optimal.
9104     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9105     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9106     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9107     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9108   }
9109 
9110   if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9111       ValVT == MVT::bf16) {
9112     // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9113     MVT IntVT = VecVT.changeTypeToInteger();
9114     SDValue IntInsert = DAG.getNode(
9115         ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9116         DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9117     return DAG.getBitcast(VecVT, IntInsert);
9118   }
9119 
9120   MVT ContainerVT = VecVT;
9121   // If the operand is a fixed-length vector, convert to a scalable one.
9122   if (VecVT.isFixedLengthVector()) {
9123     ContainerVT = getContainerForFixedLengthVector(VecVT);
9124     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9125   }
9126 
9127   // If we know the index we're going to insert at, we can shrink Vec so that
9128   // we're performing the scalar inserts and slideup on a smaller LMUL.
9129   MVT OrigContainerVT = ContainerVT;
9130   SDValue OrigVec = Vec;
9131   SDValue AlignedIdx;
9132   if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9133     const unsigned OrigIdx = IdxC->getZExtValue();
9134     // Do we know an upper bound on LMUL?
9135     if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9136                                               DL, DAG, Subtarget)) {
9137       ContainerVT = *ShrunkVT;
9138       AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9139     }
9140 
9141     // If we're compiling for an exact VLEN value, we can always perform
9142     // the insert in m1 as we can determine the register corresponding to
9143     // the index in the register group.
9144     const MVT M1VT = getLMUL1VT(ContainerVT);
9145     if (auto VLEN = Subtarget.getRealVLen();
9146         VLEN && ContainerVT.bitsGT(M1VT)) {
9147       EVT ElemVT = VecVT.getVectorElementType();
9148       unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9149       unsigned RemIdx = OrigIdx % ElemsPerVReg;
9150       unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9151       unsigned ExtractIdx =
9152           SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9153       AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9154       Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9155       ContainerVT = M1VT;
9156     }
9157 
9158     if (AlignedIdx)
9159       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9160                         AlignedIdx);
9161   }
9162 
9163   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9164   // Even i64-element vectors on RV32 can be lowered without scalar
9165   // legalization if the most-significant 32 bits of the value are not affected
9166   // by the sign-extension of the lower 32 bits.
9167   // TODO: We could also catch sign extensions of a 32-bit value.
9168   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9169     const auto *CVal = cast<ConstantSDNode>(Val);
9170     if (isInt<32>(CVal->getSExtValue())) {
9171       IsLegalInsert = true;
9172       Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9173     }
9174   }
9175 
9176   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9177 
9178   SDValue ValInVec;
9179 
9180   if (IsLegalInsert) {
9181     unsigned Opc =
9182         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
9183     if (isNullConstant(Idx)) {
9184       if (!VecVT.isFloatingPoint())
9185         Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9186       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9187 
9188       if (AlignedIdx)
9189         Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9190                           Vec, AlignedIdx);
9191       if (!VecVT.isFixedLengthVector())
9192         return Vec;
9193       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9194     }
9195     ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9196   } else {
9197     // On RV32, i64-element vectors must be specially handled to place the
9198     // value at element 0, by using two vslide1down instructions in sequence on
9199     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9200     // this.
9201     SDValue ValLo, ValHi;
9202     std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9203     MVT I32ContainerVT =
9204         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9205     SDValue I32Mask =
9206         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9207     // Limit the active VL to two.
9208     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9209     // If the Idx is 0 we can insert directly into the vector.
9210     if (isNullConstant(Idx)) {
9211       // First slide in the lo value, then the hi in above it. We use slide1down
9212       // to avoid the register group overlap constraint of vslide1up.
9213       ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9214                              Vec, Vec, ValLo, I32Mask, InsertI64VL);
9215       // If the source vector is undef don't pass along the tail elements from
9216       // the previous slide1down.
9217       SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9218       ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9219                              Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9220       // Bitcast back to the right container type.
9221       ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9222 
9223       if (AlignedIdx)
9224         ValInVec =
9225             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9226                         ValInVec, AlignedIdx);
9227       if (!VecVT.isFixedLengthVector())
9228         return ValInVec;
9229       return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9230     }
9231 
9232     // First slide in the lo value, then the hi in above it. We use slide1down
9233     // to avoid the register group overlap constraint of vslide1up.
9234     ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9235                            DAG.getUNDEF(I32ContainerVT),
9236                            DAG.getUNDEF(I32ContainerVT), ValLo,
9237                            I32Mask, InsertI64VL);
9238     ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9239                            DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9240                            I32Mask, InsertI64VL);
9241     // Bitcast back to the right container type.
9242     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9243   }
9244 
9245   // Now that the value is in a vector, slide it into position.
9246   SDValue InsertVL =
9247       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9248 
9249   // Use tail agnostic policy if Idx is the last index of Vec.
9250   unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
9251   if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9252       Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9253     Policy = RISCVII::TAIL_AGNOSTIC;
9254   SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9255                                 Idx, Mask, InsertVL, Policy);
9256 
9257   if (AlignedIdx)
9258     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9259                           Slideup, AlignedIdx);
9260   if (!VecVT.isFixedLengthVector())
9261     return Slideup;
9262   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9263 }
9264 
9265 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9266 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9267 // types this is done using VMV_X_S to allow us to glean information about the
9268 // sign bits of the result.
9269 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9270                                                      SelectionDAG &DAG) const {
9271   SDLoc DL(Op);
9272   SDValue Idx = Op.getOperand(1);
9273   SDValue Vec = Op.getOperand(0);
9274   EVT EltVT = Op.getValueType();
9275   MVT VecVT = Vec.getSimpleValueType();
9276   MVT XLenVT = Subtarget.getXLenVT();
9277 
9278   if (VecVT.getVectorElementType() == MVT::i1) {
9279     // Use vfirst.m to extract the first bit.
9280     if (isNullConstant(Idx)) {
9281       MVT ContainerVT = VecVT;
9282       if (VecVT.isFixedLengthVector()) {
9283         ContainerVT = getContainerForFixedLengthVector(VecVT);
9284         Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9285       }
9286       auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9287       SDValue Vfirst =
9288           DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9289       SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9290                                  DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9291       return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9292     }
9293     if (VecVT.isFixedLengthVector()) {
9294       unsigned NumElts = VecVT.getVectorNumElements();
9295       if (NumElts >= 8) {
9296         MVT WideEltVT;
9297         unsigned WidenVecLen;
9298         SDValue ExtractElementIdx;
9299         SDValue ExtractBitIdx;
9300         unsigned MaxEEW = Subtarget.getELen();
9301         MVT LargestEltVT = MVT::getIntegerVT(
9302             std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9303         if (NumElts <= LargestEltVT.getSizeInBits()) {
9304           assert(isPowerOf2_32(NumElts) &&
9305                  "the number of elements should be power of 2");
9306           WideEltVT = MVT::getIntegerVT(NumElts);
9307           WidenVecLen = 1;
9308           ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9309           ExtractBitIdx = Idx;
9310         } else {
9311           WideEltVT = LargestEltVT;
9312           WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9313           // extract element index = index / element width
9314           ExtractElementIdx = DAG.getNode(
9315               ISD::SRL, DL, XLenVT, Idx,
9316               DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9317           // mask bit index = index % element width
9318           ExtractBitIdx = DAG.getNode(
9319               ISD::AND, DL, XLenVT, Idx,
9320               DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9321         }
9322         MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9323         Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9324         SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9325                                          Vec, ExtractElementIdx);
9326         // Extract the bit from GPR.
9327         SDValue ShiftRight =
9328             DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9329         SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9330                                   DAG.getConstant(1, DL, XLenVT));
9331         return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9332       }
9333     }
9334     // Otherwise, promote to an i8 vector and extract from that.
9335     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9336     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9337     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9338   }
9339 
9340   if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9341       EltVT == MVT::bf16) {
9342     // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9343     MVT IntVT = VecVT.changeTypeToInteger();
9344     SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9345     SDValue IntExtract =
9346         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9347     return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9348   }
9349 
9350   // If this is a fixed vector, we need to convert it to a scalable vector.
9351   MVT ContainerVT = VecVT;
9352   if (VecVT.isFixedLengthVector()) {
9353     ContainerVT = getContainerForFixedLengthVector(VecVT);
9354     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9355   }
9356 
9357   // If we're compiling for an exact VLEN value and we have a known
9358   // constant index, we can always perform the extract in m1 (or
9359   // smaller) as we can determine the register corresponding to
9360   // the index in the register group.
9361   const auto VLen = Subtarget.getRealVLen();
9362   if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9363       IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9364     MVT M1VT = getLMUL1VT(ContainerVT);
9365     unsigned OrigIdx = IdxC->getZExtValue();
9366     EVT ElemVT = VecVT.getVectorElementType();
9367     unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9368     unsigned RemIdx = OrigIdx % ElemsPerVReg;
9369     unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9370     unsigned ExtractIdx =
9371       SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9372     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9373                       DAG.getVectorIdxConstant(ExtractIdx, DL));
9374     Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9375     ContainerVT = M1VT;
9376   }
9377 
9378   // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9379   // contains our index.
9380   std::optional<uint64_t> MaxIdx;
9381   if (VecVT.isFixedLengthVector())
9382     MaxIdx = VecVT.getVectorNumElements() - 1;
9383   if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9384     MaxIdx = IdxC->getZExtValue();
9385   if (MaxIdx) {
9386     if (auto SmallerVT =
9387             getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9388       ContainerVT = *SmallerVT;
9389       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9390                         DAG.getConstant(0, DL, XLenVT));
9391     }
9392   }
9393 
9394   // If after narrowing, the required slide is still greater than LMUL2,
9395   // fallback to generic expansion and go through the stack.  This is done
9396   // for a subtle reason: extracting *all* elements out of a vector is
9397   // widely expected to be linear in vector size, but because vslidedown
9398   // is linear in LMUL, performing N extracts using vslidedown becomes
9399   // O(n^2) / (VLEN/ETYPE) work.  On the surface, going through the stack
9400   // seems to have the same problem (the store is linear in LMUL), but the
9401   // generic expansion *memoizes* the store, and thus for many extracts of
9402   // the same vector we end up with one store and a bunch of loads.
9403   // TODO: We don't have the same code for insert_vector_elt because we
9404   // have BUILD_VECTOR and handle the degenerate case there.  Should we
9405   // consider adding an inverse BUILD_VECTOR node?
9406   MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9407   if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9408     return SDValue();
9409 
9410   // If the index is 0, the vector is already in the right position.
9411   if (!isNullConstant(Idx)) {
9412     // Use a VL of 1 to avoid processing more elements than we need.
9413     auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9414     Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9415                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9416   }
9417 
9418   if (!EltVT.isInteger()) {
9419     // Floating-point extracts are handled in TableGen.
9420     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9421                        DAG.getVectorIdxConstant(0, DL));
9422   }
9423 
9424   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9425   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9426 }
9427 
9428 // Some RVV intrinsics may claim that they want an integer operand to be
9429 // promoted or expanded.
9430 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
9431                                            const RISCVSubtarget &Subtarget) {
9432   assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9433           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9434           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9435          "Unexpected opcode");
9436 
9437   if (!Subtarget.hasVInstructions())
9438     return SDValue();
9439 
9440   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9441                   Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9442   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9443 
9444   SDLoc DL(Op);
9445 
9446   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
9447       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9448   if (!II || !II->hasScalarOperand())
9449     return SDValue();
9450 
9451   unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9452   assert(SplatOp < Op.getNumOperands());
9453 
9454   SmallVector<SDValue, 8> Operands(Op->ops());
9455   SDValue &ScalarOp = Operands[SplatOp];
9456   MVT OpVT = ScalarOp.getSimpleValueType();
9457   MVT XLenVT = Subtarget.getXLenVT();
9458 
9459   // If this isn't a scalar, or its type is XLenVT we're done.
9460   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9461     return SDValue();
9462 
9463   // Simplest case is that the operand needs to be promoted to XLenVT.
9464   if (OpVT.bitsLT(XLenVT)) {
9465     // If the operand is a constant, sign extend to increase our chances
9466     // of being able to use a .vi instruction. ANY_EXTEND would become a
9467     // a zero extend and the simm5 check in isel would fail.
9468     // FIXME: Should we ignore the upper bits in isel instead?
9469     unsigned ExtOpc =
9470         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9471     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9472     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9473   }
9474 
9475   // Use the previous operand to get the vXi64 VT. The result might be a mask
9476   // VT for compares. Using the previous operand assumes that the previous
9477   // operand will never have a smaller element size than a scalar operand and
9478   // that a widening operation never uses SEW=64.
9479   // NOTE: If this fails the below assert, we can probably just find the
9480   // element count from any operand or result and use it to construct the VT.
9481   assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9482   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9483 
9484   // The more complex case is when the scalar is larger than XLenVT.
9485   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9486          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9487 
9488   // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9489   // instruction to sign-extend since SEW>XLEN.
9490   if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9491     ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9492     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9493   }
9494 
9495   switch (IntNo) {
9496   case Intrinsic::riscv_vslide1up:
9497   case Intrinsic::riscv_vslide1down:
9498   case Intrinsic::riscv_vslide1up_mask:
9499   case Intrinsic::riscv_vslide1down_mask: {
9500     // We need to special case these when the scalar is larger than XLen.
9501     unsigned NumOps = Op.getNumOperands();
9502     bool IsMasked = NumOps == 7;
9503 
9504     // Convert the vector source to the equivalent nxvXi32 vector.
9505     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9506     SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9507     SDValue ScalarLo, ScalarHi;
9508     std::tie(ScalarLo, ScalarHi) =
9509         DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9510 
9511     // Double the VL since we halved SEW.
9512     SDValue AVL = getVLOperand(Op);
9513     SDValue I32VL;
9514 
9515     // Optimize for constant AVL
9516     if (isa<ConstantSDNode>(AVL)) {
9517       const auto [MinVLMAX, MaxVLMAX] =
9518           RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget);
9519 
9520       uint64_t AVLInt = AVL->getAsZExtVal();
9521       if (AVLInt <= MinVLMAX) {
9522         I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9523       } else if (AVLInt >= 2 * MaxVLMAX) {
9524         // Just set vl to VLMAX in this situation
9525         I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9526       } else {
9527         // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9528         // is related to the hardware implementation.
9529         // So let the following code handle
9530       }
9531     }
9532     if (!I32VL) {
9533       RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
9534       SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9535       unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9536       SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9537       SDValue SETVL =
9538           DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9539       // Using vsetvli instruction to get actually used length which related to
9540       // the hardware implementation
9541       SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9542                                SEW, LMUL);
9543       I32VL =
9544           DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9545     }
9546 
9547     SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9548 
9549     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9550     // instructions.
9551     SDValue Passthru;
9552     if (IsMasked)
9553       Passthru = DAG.getUNDEF(I32VT);
9554     else
9555       Passthru = DAG.getBitcast(I32VT, Operands[1]);
9556 
9557     if (IntNo == Intrinsic::riscv_vslide1up ||
9558         IntNo == Intrinsic::riscv_vslide1up_mask) {
9559       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9560                         ScalarHi, I32Mask, I32VL);
9561       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9562                         ScalarLo, I32Mask, I32VL);
9563     } else {
9564       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9565                         ScalarLo, I32Mask, I32VL);
9566       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9567                         ScalarHi, I32Mask, I32VL);
9568     }
9569 
9570     // Convert back to nxvXi64.
9571     Vec = DAG.getBitcast(VT, Vec);
9572 
9573     if (!IsMasked)
9574       return Vec;
9575     // Apply mask after the operation.
9576     SDValue Mask = Operands[NumOps - 3];
9577     SDValue MaskedOff = Operands[1];
9578     // Assume Policy operand is the last operand.
9579     uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9580     // We don't need to select maskedoff if it's undef.
9581     if (MaskedOff.isUndef())
9582       return Vec;
9583     // TAMU
9584     if (Policy == RISCVII::TAIL_AGNOSTIC)
9585       return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9586                          DAG.getUNDEF(VT), AVL);
9587     // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9588     // It's fine because vmerge does not care mask policy.
9589     return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9590                        MaskedOff, AVL);
9591   }
9592   }
9593 
9594   // We need to convert the scalar to a splat vector.
9595   SDValue VL = getVLOperand(Op);
9596   assert(VL.getValueType() == XLenVT);
9597   ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9598   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9599 }
9600 
9601 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9602 // scalable vector llvm.get.vector.length for now.
9603 //
9604 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
9605 // (vscale * VF). The vscale and VF are independent of element width. We use
9606 // SEW=8 for the vsetvli because it is the only element width that supports all
9607 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9608 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9609 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9610 // SEW and LMUL are better for the surrounding vector instructions.
9611 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
9612                                     const RISCVSubtarget &Subtarget) {
9613   MVT XLenVT = Subtarget.getXLenVT();
9614 
9615   // The smallest LMUL is only valid for the smallest element width.
9616   const unsigned ElementWidth = 8;
9617 
9618   // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9619   unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9620   // We don't support VF==1 with ELEN==32.
9621   [[maybe_unused]] unsigned MinVF =
9622       RISCV::RVVBitsPerBlock / Subtarget.getELen();
9623 
9624   [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9625   assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9626          "Unexpected VF");
9627 
9628   bool Fractional = VF < LMul1VF;
9629   unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9630   unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9631   unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9632 
9633   SDLoc DL(N);
9634 
9635   SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9636   SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9637 
9638   SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9639 
9640   SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9641   SDValue Res =
9642       DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9643   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9644 }
9645 
9646 static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG,
9647                              const RISCVSubtarget &Subtarget) {
9648   SDValue Op0 = N->getOperand(1);
9649   MVT OpVT = Op0.getSimpleValueType();
9650   MVT ContainerVT = OpVT;
9651   if (OpVT.isFixedLengthVector()) {
9652     ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9653     Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9654   }
9655   MVT XLenVT = Subtarget.getXLenVT();
9656   SDLoc DL(N);
9657   auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9658   SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9659   if (isOneConstant(N->getOperand(2)))
9660     return Res;
9661 
9662   // Convert -1 to VL.
9663   SDValue Setcc =
9664       DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9665   VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9666   return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9667 }
9668 
9669 static inline void promoteVCIXScalar(const SDValue &Op,
9670                                      SmallVectorImpl<SDValue> &Operands,
9671                                      SelectionDAG &DAG) {
9672   const RISCVSubtarget &Subtarget =
9673       DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
9674 
9675   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9676                   Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9677   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9678   SDLoc DL(Op);
9679 
9680   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
9681       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9682   if (!II || !II->hasScalarOperand())
9683     return;
9684 
9685   unsigned SplatOp = II->ScalarOperand + 1;
9686   assert(SplatOp < Op.getNumOperands());
9687 
9688   SDValue &ScalarOp = Operands[SplatOp];
9689   MVT OpVT = ScalarOp.getSimpleValueType();
9690   MVT XLenVT = Subtarget.getXLenVT();
9691 
9692   // The code below is partially copied from lowerVectorIntrinsicScalars.
9693   // If this isn't a scalar, or its type is XLenVT we're done.
9694   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9695     return;
9696 
9697   // Manually emit promote operation for scalar operation.
9698   if (OpVT.bitsLT(XLenVT)) {
9699     unsigned ExtOpc =
9700         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9701     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9702   }
9703 }
9704 
9705 static void processVCIXOperands(SDValue &OrigOp,
9706                                 SmallVectorImpl<SDValue> &Operands,
9707                                 SelectionDAG &DAG) {
9708   promoteVCIXScalar(OrigOp, Operands, DAG);
9709   const RISCVSubtarget &Subtarget =
9710       DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
9711   for (SDValue &V : Operands) {
9712     EVT ValType = V.getValueType();
9713     if (ValType.isVector() && ValType.isFloatingPoint()) {
9714       MVT InterimIVT =
9715           MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9716                            ValType.getVectorElementCount());
9717       V = DAG.getBitcast(InterimIVT, V);
9718     }
9719     if (ValType.isFixedLengthVector()) {
9720       MVT OpContainerVT = getContainerForFixedLengthVector(
9721           DAG, V.getSimpleValueType(), Subtarget);
9722       V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9723     }
9724   }
9725 }
9726 
9727 // LMUL * VLEN should be greater than or equal to EGS * SEW
9728 static inline bool isValidEGW(int EGS, EVT VT,
9729                               const RISCVSubtarget &Subtarget) {
9730   return (Subtarget.getRealMinVLen() *
9731              VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
9732          EGS * VT.getScalarSizeInBits();
9733 }
9734 
9735 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9736                                                      SelectionDAG &DAG) const {
9737   unsigned IntNo = Op.getConstantOperandVal(0);
9738   SDLoc DL(Op);
9739   MVT XLenVT = Subtarget.getXLenVT();
9740 
9741   switch (IntNo) {
9742   default:
9743     break; // Don't custom lower most intrinsics.
9744   case Intrinsic::riscv_tuple_insert: {
9745     SDValue Vec = Op.getOperand(1);
9746     SDValue SubVec = Op.getOperand(2);
9747     SDValue Index = Op.getOperand(3);
9748 
9749     return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9750                        SubVec, Index);
9751   }
9752   case Intrinsic::riscv_tuple_extract: {
9753     SDValue Vec = Op.getOperand(1);
9754     SDValue Index = Op.getOperand(2);
9755 
9756     return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9757                        Index);
9758   }
9759   case Intrinsic::thread_pointer: {
9760     EVT PtrVT = getPointerTy(DAG.getDataLayout());
9761     return DAG.getRegister(RISCV::X4, PtrVT);
9762   }
9763   case Intrinsic::riscv_orc_b:
9764   case Intrinsic::riscv_brev8:
9765   case Intrinsic::riscv_sha256sig0:
9766   case Intrinsic::riscv_sha256sig1:
9767   case Intrinsic::riscv_sha256sum0:
9768   case Intrinsic::riscv_sha256sum1:
9769   case Intrinsic::riscv_sm3p0:
9770   case Intrinsic::riscv_sm3p1: {
9771     unsigned Opc;
9772     switch (IntNo) {
9773     case Intrinsic::riscv_orc_b:      Opc = RISCVISD::ORC_B;      break;
9774     case Intrinsic::riscv_brev8:      Opc = RISCVISD::BREV8;      break;
9775     case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9776     case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9777     case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9778     case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9779     case Intrinsic::riscv_sm3p0:      Opc = RISCVISD::SM3P0;      break;
9780     case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;
9781     }
9782 
9783     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9784   }
9785   case Intrinsic::riscv_sm4ks:
9786   case Intrinsic::riscv_sm4ed: {
9787     unsigned Opc =
9788         IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9789 
9790     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9791                        Op.getOperand(3));
9792   }
9793   case Intrinsic::riscv_zip:
9794   case Intrinsic::riscv_unzip: {
9795     unsigned Opc =
9796         IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9797     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9798   }
9799   case Intrinsic::riscv_mopr:
9800     return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9801                        Op.getOperand(2));
9802 
9803   case Intrinsic::riscv_moprr: {
9804     return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9805                        Op.getOperand(2), Op.getOperand(3));
9806   }
9807   case Intrinsic::riscv_clmul:
9808     return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9809                        Op.getOperand(2));
9810   case Intrinsic::riscv_clmulh:
9811   case Intrinsic::riscv_clmulr: {
9812     unsigned Opc =
9813         IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9814     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9815   }
9816   case Intrinsic::experimental_get_vector_length:
9817     return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9818   case Intrinsic::experimental_cttz_elts:
9819     return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9820   case Intrinsic::riscv_vmv_x_s: {
9821     SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9822     return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9823   }
9824   case Intrinsic::riscv_vfmv_f_s:
9825     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9826                        Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9827   case Intrinsic::riscv_vmv_v_x:
9828     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9829                             Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9830                             Subtarget);
9831   case Intrinsic::riscv_vfmv_v_f:
9832     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9833                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9834   case Intrinsic::riscv_vmv_s_x: {
9835     SDValue Scalar = Op.getOperand(2);
9836 
9837     if (Scalar.getValueType().bitsLE(XLenVT)) {
9838       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9839       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9840                          Op.getOperand(1), Scalar, Op.getOperand(3));
9841     }
9842 
9843     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9844 
9845     // This is an i64 value that lives in two scalar registers. We have to
9846     // insert this in a convoluted way. First we build vXi64 splat containing
9847     // the two values that we assemble using some bit math. Next we'll use
9848     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9849     // to merge element 0 from our splat into the source vector.
9850     // FIXME: This is probably not the best way to do this, but it is
9851     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9852     // point.
9853     //   sw lo, (a0)
9854     //   sw hi, 4(a0)
9855     //   vlse vX, (a0)
9856     //
9857     //   vid.v      vVid
9858     //   vmseq.vx   mMask, vVid, 0
9859     //   vmerge.vvm vDest, vSrc, vVal, mMask
9860     MVT VT = Op.getSimpleValueType();
9861     SDValue Vec = Op.getOperand(1);
9862     SDValue VL = getVLOperand(Op);
9863 
9864     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9865     if (Op.getOperand(1).isUndef())
9866       return SplattedVal;
9867     SDValue SplattedIdx =
9868         DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9869                     DAG.getConstant(0, DL, MVT::i32), VL);
9870 
9871     MVT MaskVT = getMaskTypeFor(VT);
9872     SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9873     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9874     SDValue SelectCond =
9875         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9876                     {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9877                      DAG.getUNDEF(MaskVT), Mask, VL});
9878     return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9879                        Vec, DAG.getUNDEF(VT), VL);
9880   }
9881   case Intrinsic::riscv_vfmv_s_f:
9882     return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9883                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9884   // EGS * EEW >= 128 bits
9885   case Intrinsic::riscv_vaesdf_vv:
9886   case Intrinsic::riscv_vaesdf_vs:
9887   case Intrinsic::riscv_vaesdm_vv:
9888   case Intrinsic::riscv_vaesdm_vs:
9889   case Intrinsic::riscv_vaesef_vv:
9890   case Intrinsic::riscv_vaesef_vs:
9891   case Intrinsic::riscv_vaesem_vv:
9892   case Intrinsic::riscv_vaesem_vs:
9893   case Intrinsic::riscv_vaeskf1:
9894   case Intrinsic::riscv_vaeskf2:
9895   case Intrinsic::riscv_vaesz_vs:
9896   case Intrinsic::riscv_vsm4k:
9897   case Intrinsic::riscv_vsm4r_vv:
9898   case Intrinsic::riscv_vsm4r_vs: {
9899     if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9900         !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9901         !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9902       report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9903     return Op;
9904   }
9905   // EGS * EEW >= 256 bits
9906   case Intrinsic::riscv_vsm3c:
9907   case Intrinsic::riscv_vsm3me: {
9908     if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9909         !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9910       report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9911     return Op;
9912   }
9913   // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9914   case Intrinsic::riscv_vsha2ch:
9915   case Intrinsic::riscv_vsha2cl:
9916   case Intrinsic::riscv_vsha2ms: {
9917     if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9918         !Subtarget.hasStdExtZvknhb())
9919       report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9920     if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9921         !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9922         !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9923       report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9924     return Op;
9925   }
9926   case Intrinsic::riscv_sf_vc_v_x:
9927   case Intrinsic::riscv_sf_vc_v_i:
9928   case Intrinsic::riscv_sf_vc_v_xv:
9929   case Intrinsic::riscv_sf_vc_v_iv:
9930   case Intrinsic::riscv_sf_vc_v_vv:
9931   case Intrinsic::riscv_sf_vc_v_fv:
9932   case Intrinsic::riscv_sf_vc_v_xvv:
9933   case Intrinsic::riscv_sf_vc_v_ivv:
9934   case Intrinsic::riscv_sf_vc_v_vvv:
9935   case Intrinsic::riscv_sf_vc_v_fvv:
9936   case Intrinsic::riscv_sf_vc_v_xvw:
9937   case Intrinsic::riscv_sf_vc_v_ivw:
9938   case Intrinsic::riscv_sf_vc_v_vvw:
9939   case Intrinsic::riscv_sf_vc_v_fvw: {
9940     MVT VT = Op.getSimpleValueType();
9941 
9942     SmallVector<SDValue> Operands{Op->op_values()};
9943     processVCIXOperands(Op, Operands, DAG);
9944 
9945     MVT RetVT = VT;
9946     if (VT.isFixedLengthVector())
9947       RetVT = getContainerForFixedLengthVector(VT);
9948     else if (VT.isFloatingPoint())
9949       RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9950                                VT.getVectorElementCount());
9951 
9952     SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9953 
9954     if (VT.isFixedLengthVector())
9955       NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9956     else if (VT.isFloatingPoint())
9957       NewNode = DAG.getBitcast(VT, NewNode);
9958 
9959     if (Op == NewNode)
9960       break;
9961 
9962     return NewNode;
9963   }
9964   }
9965 
9966   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9967 }
9968 
9969 static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG,
9970                                            unsigned Type) {
9971   SDLoc DL(Op);
9972   SmallVector<SDValue> Operands{Op->op_values()};
9973   Operands.erase(Operands.begin() + 1);
9974 
9975   const RISCVSubtarget &Subtarget =
9976       DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
9977   MVT VT = Op.getSimpleValueType();
9978   MVT RetVT = VT;
9979   MVT FloatVT = VT;
9980 
9981   if (VT.isFloatingPoint()) {
9982     RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9983                              VT.getVectorElementCount());
9984     FloatVT = RetVT;
9985   }
9986   if (VT.isFixedLengthVector())
9987     RetVT = getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), RetVT,
9988                                              Subtarget);
9989 
9990   processVCIXOperands(Op, Operands, DAG);
9991 
9992   SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9993   SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9994   SDValue Chain = NewNode.getValue(1);
9995 
9996   if (VT.isFixedLengthVector())
9997     NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9998   if (VT.isFloatingPoint())
9999     NewNode = DAG.getBitcast(VT, NewNode);
10000 
10001   NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
10002 
10003   return NewNode;
10004 }
10005 
10006 static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG,
10007                                          unsigned Type) {
10008   SmallVector<SDValue> Operands{Op->op_values()};
10009   Operands.erase(Operands.begin() + 1);
10010   processVCIXOperands(Op, Operands, DAG);
10011 
10012   return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
10013 }
10014 
10015 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
10016                                                     SelectionDAG &DAG) const {
10017   unsigned IntNo = Op.getConstantOperandVal(1);
10018   switch (IntNo) {
10019   default:
10020     break;
10021   case Intrinsic::riscv_seg2_load:
10022   case Intrinsic::riscv_seg3_load:
10023   case Intrinsic::riscv_seg4_load:
10024   case Intrinsic::riscv_seg5_load:
10025   case Intrinsic::riscv_seg6_load:
10026   case Intrinsic::riscv_seg7_load:
10027   case Intrinsic::riscv_seg8_load: {
10028     SDLoc DL(Op);
10029     static const Intrinsic::ID VlsegInts[7] = {
10030         Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10031         Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10032         Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10033         Intrinsic::riscv_vlseg8};
10034     unsigned NF = Op->getNumValues() - 1;
10035     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10036     MVT XLenVT = Subtarget.getXLenVT();
10037     MVT VT = Op->getSimpleValueType(0);
10038     MVT ContainerVT = getContainerForFixedLengthVector(VT);
10039     unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10040                   ContainerVT.getScalarSizeInBits();
10041     EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10042 
10043     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10044     SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
10045     auto *Load = cast<MemIntrinsicSDNode>(Op);
10046 
10047     SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
10048     SDValue Ops[] = {
10049         Load->getChain(),
10050         IntID,
10051         DAG.getUNDEF(VecTupTy),
10052         Op.getOperand(2),
10053         VL,
10054         DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10055     SDValue Result =
10056         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
10057                                 Load->getMemoryVT(), Load->getMemOperand());
10058     SmallVector<SDValue, 9> Results;
10059     for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
10060       SDValue SubVec =
10061           DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
10062                       Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
10063       Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
10064     }
10065     Results.push_back(Result.getValue(1));
10066     return DAG.getMergeValues(Results, DL);
10067   }
10068   case Intrinsic::riscv_sf_vc_v_x_se:
10069     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
10070   case Intrinsic::riscv_sf_vc_v_i_se:
10071     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
10072   case Intrinsic::riscv_sf_vc_v_xv_se:
10073     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
10074   case Intrinsic::riscv_sf_vc_v_iv_se:
10075     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
10076   case Intrinsic::riscv_sf_vc_v_vv_se:
10077     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
10078   case Intrinsic::riscv_sf_vc_v_fv_se:
10079     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
10080   case Intrinsic::riscv_sf_vc_v_xvv_se:
10081     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
10082   case Intrinsic::riscv_sf_vc_v_ivv_se:
10083     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
10084   case Intrinsic::riscv_sf_vc_v_vvv_se:
10085     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
10086   case Intrinsic::riscv_sf_vc_v_fvv_se:
10087     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
10088   case Intrinsic::riscv_sf_vc_v_xvw_se:
10089     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
10090   case Intrinsic::riscv_sf_vc_v_ivw_se:
10091     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
10092   case Intrinsic::riscv_sf_vc_v_vvw_se:
10093     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
10094   case Intrinsic::riscv_sf_vc_v_fvw_se:
10095     return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
10096   }
10097 
10098   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10099 }
10100 
10101 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10102                                                  SelectionDAG &DAG) const {
10103   unsigned IntNo = Op.getConstantOperandVal(1);
10104   switch (IntNo) {
10105   default:
10106     break;
10107   case Intrinsic::riscv_seg2_store:
10108   case Intrinsic::riscv_seg3_store:
10109   case Intrinsic::riscv_seg4_store:
10110   case Intrinsic::riscv_seg5_store:
10111   case Intrinsic::riscv_seg6_store:
10112   case Intrinsic::riscv_seg7_store:
10113   case Intrinsic::riscv_seg8_store: {
10114     SDLoc DL(Op);
10115     static const Intrinsic::ID VssegInts[] = {
10116         Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10117         Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10118         Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10119         Intrinsic::riscv_vsseg8};
10120     // Operands are (chain, int_id, vec*, ptr, vl)
10121     unsigned NF = Op->getNumOperands() - 4;
10122     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10123     MVT XLenVT = Subtarget.getXLenVT();
10124     MVT VT = Op->getOperand(2).getSimpleValueType();
10125     MVT ContainerVT = getContainerForFixedLengthVector(VT);
10126     unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10127                   ContainerVT.getScalarSizeInBits();
10128     EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10129 
10130     SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10131     SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10132     SDValue Ptr = Op->getOperand(NF + 2);
10133 
10134     auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10135 
10136     SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10137     for (unsigned i = 0; i < NF; i++)
10138       StoredVal = DAG.getNode(
10139           RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10140           convertToScalableVector(
10141               ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10142           DAG.getVectorIdxConstant(i, DL));
10143 
10144     SDValue Ops[] = {
10145         FixedIntrinsic->getChain(),
10146         IntID,
10147         StoredVal,
10148         Ptr,
10149         VL,
10150         DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10151 
10152     return DAG.getMemIntrinsicNode(
10153         ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10154         FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10155   }
10156   case Intrinsic::riscv_sf_vc_xv_se:
10157     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
10158   case Intrinsic::riscv_sf_vc_iv_se:
10159     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
10160   case Intrinsic::riscv_sf_vc_vv_se:
10161     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
10162   case Intrinsic::riscv_sf_vc_fv_se:
10163     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
10164   case Intrinsic::riscv_sf_vc_xvv_se:
10165     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
10166   case Intrinsic::riscv_sf_vc_ivv_se:
10167     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
10168   case Intrinsic::riscv_sf_vc_vvv_se:
10169     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
10170   case Intrinsic::riscv_sf_vc_fvv_se:
10171     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
10172   case Intrinsic::riscv_sf_vc_xvw_se:
10173     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
10174   case Intrinsic::riscv_sf_vc_ivw_se:
10175     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
10176   case Intrinsic::riscv_sf_vc_vvw_se:
10177     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
10178   case Intrinsic::riscv_sf_vc_fvw_se:
10179     return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
10180   }
10181 
10182   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10183 }
10184 
10185 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10186   switch (ISDOpcode) {
10187   default:
10188     llvm_unreachable("Unhandled reduction");
10189   case ISD::VP_REDUCE_ADD:
10190   case ISD::VECREDUCE_ADD:
10191     return RISCVISD::VECREDUCE_ADD_VL;
10192   case ISD::VP_REDUCE_UMAX:
10193   case ISD::VECREDUCE_UMAX:
10194     return RISCVISD::VECREDUCE_UMAX_VL;
10195   case ISD::VP_REDUCE_SMAX:
10196   case ISD::VECREDUCE_SMAX:
10197     return RISCVISD::VECREDUCE_SMAX_VL;
10198   case ISD::VP_REDUCE_UMIN:
10199   case ISD::VECREDUCE_UMIN:
10200     return RISCVISD::VECREDUCE_UMIN_VL;
10201   case ISD::VP_REDUCE_SMIN:
10202   case ISD::VECREDUCE_SMIN:
10203     return RISCVISD::VECREDUCE_SMIN_VL;
10204   case ISD::VP_REDUCE_AND:
10205   case ISD::VECREDUCE_AND:
10206     return RISCVISD::VECREDUCE_AND_VL;
10207   case ISD::VP_REDUCE_OR:
10208   case ISD::VECREDUCE_OR:
10209     return RISCVISD::VECREDUCE_OR_VL;
10210   case ISD::VP_REDUCE_XOR:
10211   case ISD::VECREDUCE_XOR:
10212     return RISCVISD::VECREDUCE_XOR_VL;
10213   case ISD::VP_REDUCE_FADD:
10214     return RISCVISD::VECREDUCE_FADD_VL;
10215   case ISD::VP_REDUCE_SEQ_FADD:
10216     return RISCVISD::VECREDUCE_SEQ_FADD_VL;
10217   case ISD::VP_REDUCE_FMAX:
10218   case ISD::VP_REDUCE_FMAXIMUM:
10219     return RISCVISD::VECREDUCE_FMAX_VL;
10220   case ISD::VP_REDUCE_FMIN:
10221   case ISD::VP_REDUCE_FMINIMUM:
10222     return RISCVISD::VECREDUCE_FMIN_VL;
10223   }
10224 
10225 }
10226 
10227 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10228                                                          SelectionDAG &DAG,
10229                                                          bool IsVP) const {
10230   SDLoc DL(Op);
10231   SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10232   MVT VecVT = Vec.getSimpleValueType();
10233   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10234           Op.getOpcode() == ISD::VECREDUCE_OR ||
10235           Op.getOpcode() == ISD::VECREDUCE_XOR ||
10236           Op.getOpcode() == ISD::VP_REDUCE_AND ||
10237           Op.getOpcode() == ISD::VP_REDUCE_OR ||
10238           Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10239          "Unexpected reduction lowering");
10240 
10241   MVT XLenVT = Subtarget.getXLenVT();
10242 
10243   MVT ContainerVT = VecVT;
10244   if (VecVT.isFixedLengthVector()) {
10245     ContainerVT = getContainerForFixedLengthVector(VecVT);
10246     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10247   }
10248 
10249   SDValue Mask, VL;
10250   if (IsVP) {
10251     Mask = Op.getOperand(2);
10252     VL = Op.getOperand(3);
10253   } else {
10254     std::tie(Mask, VL) =
10255         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10256   }
10257 
10258   ISD::CondCode CC;
10259   switch (Op.getOpcode()) {
10260   default:
10261     llvm_unreachable("Unhandled reduction");
10262   case ISD::VECREDUCE_AND:
10263   case ISD::VP_REDUCE_AND: {
10264     // vcpop ~x == 0
10265     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10266     if (IsVP || VecVT.isFixedLengthVector())
10267       Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10268     else
10269       Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
10270     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10271     CC = ISD::SETEQ;
10272     break;
10273   }
10274   case ISD::VECREDUCE_OR:
10275   case ISD::VP_REDUCE_OR:
10276     // vcpop x != 0
10277     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10278     CC = ISD::SETNE;
10279     break;
10280   case ISD::VECREDUCE_XOR:
10281   case ISD::VP_REDUCE_XOR: {
10282     // ((vcpop x) & 1) != 0
10283     SDValue One = DAG.getConstant(1, DL, XLenVT);
10284     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10285     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10286     CC = ISD::SETNE;
10287     break;
10288   }
10289   }
10290 
10291   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10292   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10293   SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10294 
10295   if (!IsVP)
10296     return SetCC;
10297 
10298   // Now include the start value in the operation.
10299   // Note that we must return the start value when no elements are operated
10300   // upon. The vcpop instructions we've emitted in each case above will return
10301   // 0 for an inactive vector, and so we've already received the neutral value:
10302   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10303   // can simply include the start value.
10304   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10305   return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10306 }
10307 
10308 static bool isNonZeroAVL(SDValue AVL) {
10309   auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10310   auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10311   return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10312          (ImmAVL && ImmAVL->getZExtValue() >= 1);
10313 }
10314 
10315 /// Helper to lower a reduction sequence of the form:
10316 /// scalar = reduce_op vec, scalar_start
10317 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10318                                  SDValue StartValue, SDValue Vec, SDValue Mask,
10319                                  SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10320                                  const RISCVSubtarget &Subtarget) {
10321   const MVT VecVT = Vec.getSimpleValueType();
10322   const MVT M1VT = getLMUL1VT(VecVT);
10323   const MVT XLenVT = Subtarget.getXLenVT();
10324   const bool NonZeroAVL = isNonZeroAVL(VL);
10325 
10326   // The reduction needs an LMUL1 input; do the splat at either LMUL1
10327   // or the original VT if fractional.
10328   auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10329   // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10330   // prove it is non-zero.  For the AVL=0 case, we need the scalar to
10331   // be the result of the reduction operation.
10332   auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10333   SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10334                                            DAG, Subtarget);
10335   if (M1VT != InnerVT)
10336     InitialValue =
10337         DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10338                     InitialValue, DAG.getVectorIdxConstant(0, DL));
10339   SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10340   SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10341   SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10342   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10343   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10344                      DAG.getVectorIdxConstant(0, DL));
10345 }
10346 
10347 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10348                                             SelectionDAG &DAG) const {
10349   SDLoc DL(Op);
10350   SDValue Vec = Op.getOperand(0);
10351   EVT VecEVT = Vec.getValueType();
10352 
10353   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10354 
10355   // Due to ordering in legalize types we may have a vector type that needs to
10356   // be split. Do that manually so we can get down to a legal type.
10357   while (getTypeAction(*DAG.getContext(), VecEVT) ==
10358          TargetLowering::TypeSplitVector) {
10359     auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10360     VecEVT = Lo.getValueType();
10361     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10362   }
10363 
10364   // TODO: The type may need to be widened rather than split. Or widened before
10365   // it can be split.
10366   if (!isTypeLegal(VecEVT))
10367     return SDValue();
10368 
10369   MVT VecVT = VecEVT.getSimpleVT();
10370   MVT VecEltVT = VecVT.getVectorElementType();
10371   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10372 
10373   MVT ContainerVT = VecVT;
10374   if (VecVT.isFixedLengthVector()) {
10375     ContainerVT = getContainerForFixedLengthVector(VecVT);
10376     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10377   }
10378 
10379   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10380 
10381   SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10382   switch (BaseOpc) {
10383   case ISD::AND:
10384   case ISD::OR:
10385   case ISD::UMAX:
10386   case ISD::UMIN:
10387   case ISD::SMAX:
10388   case ISD::SMIN:
10389     StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10390                          DAG.getVectorIdxConstant(0, DL));
10391   }
10392   return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10393                            Mask, VL, DL, DAG, Subtarget);
10394 }
10395 
10396 // Given a reduction op, this function returns the matching reduction opcode,
10397 // the vector SDValue and the scalar SDValue required to lower this to a
10398 // RISCVISD node.
10399 static std::tuple<unsigned, SDValue, SDValue>
10400 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
10401                                const RISCVSubtarget &Subtarget) {
10402   SDLoc DL(Op);
10403   auto Flags = Op->getFlags();
10404   unsigned Opcode = Op.getOpcode();
10405   switch (Opcode) {
10406   default:
10407     llvm_unreachable("Unhandled reduction");
10408   case ISD::VECREDUCE_FADD: {
10409     // Use positive zero if we can. It is cheaper to materialize.
10410     SDValue Zero =
10411         DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10412     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10413   }
10414   case ISD::VECREDUCE_SEQ_FADD:
10415     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10416                            Op.getOperand(0));
10417   case ISD::VECREDUCE_FMINIMUM:
10418   case ISD::VECREDUCE_FMAXIMUM:
10419   case ISD::VECREDUCE_FMIN:
10420   case ISD::VECREDUCE_FMAX: {
10421     SDValue Front =
10422         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10423                     DAG.getVectorIdxConstant(0, DL));
10424     unsigned RVVOpc =
10425         (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10426             ? RISCVISD::VECREDUCE_FMIN_VL
10427             : RISCVISD::VECREDUCE_FMAX_VL;
10428     return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10429   }
10430   }
10431 }
10432 
10433 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10434                                               SelectionDAG &DAG) const {
10435   SDLoc DL(Op);
10436   MVT VecEltVT = Op.getSimpleValueType();
10437 
10438   unsigned RVVOpcode;
10439   SDValue VectorVal, ScalarVal;
10440   std::tie(RVVOpcode, VectorVal, ScalarVal) =
10441       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10442   MVT VecVT = VectorVal.getSimpleValueType();
10443 
10444   MVT ContainerVT = VecVT;
10445   if (VecVT.isFixedLengthVector()) {
10446     ContainerVT = getContainerForFixedLengthVector(VecVT);
10447     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10448   }
10449 
10450   MVT ResVT = Op.getSimpleValueType();
10451   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10452   SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10453                                   VL, DL, DAG, Subtarget);
10454   if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10455       Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10456     return Res;
10457 
10458   if (Op->getFlags().hasNoNaNs())
10459     return Res;
10460 
10461   // Force output to NaN if any element is Nan.
10462   SDValue IsNan =
10463       DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10464                   {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10465                    DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10466   MVT XLenVT = Subtarget.getXLenVT();
10467   SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10468   SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10469                                 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10470   return DAG.getSelect(
10471       DL, ResVT, NoNaNs, Res,
10472       DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10473 }
10474 
10475 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10476                                            SelectionDAG &DAG) const {
10477   SDLoc DL(Op);
10478   unsigned Opc = Op.getOpcode();
10479   SDValue Start = Op.getOperand(0);
10480   SDValue Vec = Op.getOperand(1);
10481   EVT VecEVT = Vec.getValueType();
10482   MVT XLenVT = Subtarget.getXLenVT();
10483 
10484   // TODO: The type may need to be widened rather than split. Or widened before
10485   // it can be split.
10486   if (!isTypeLegal(VecEVT))
10487     return SDValue();
10488 
10489   MVT VecVT = VecEVT.getSimpleVT();
10490   unsigned RVVOpcode = getRVVReductionOp(Opc);
10491 
10492   if (VecVT.isFixedLengthVector()) {
10493     auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10494     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10495   }
10496 
10497   SDValue VL = Op.getOperand(3);
10498   SDValue Mask = Op.getOperand(2);
10499   SDValue Res =
10500       lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10501                         Vec, Mask, VL, DL, DAG, Subtarget);
10502   if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10503       Op->getFlags().hasNoNaNs())
10504     return Res;
10505 
10506   // Propagate NaNs.
10507   MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10508   // Check if any of the elements in Vec is NaN.
10509   SDValue IsNaN = DAG.getNode(
10510       RISCVISD::SETCC_VL, DL, PredVT,
10511       {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10512   SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10513   // Check if the start value is NaN.
10514   SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10515   VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10516   SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10517                                 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10518   MVT ResVT = Res.getSimpleValueType();
10519   return DAG.getSelect(
10520       DL, ResVT, NoNaNs, Res,
10521       DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10522 }
10523 
10524 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10525                                                    SelectionDAG &DAG) const {
10526   SDValue Vec = Op.getOperand(0);
10527   SDValue SubVec = Op.getOperand(1);
10528   MVT VecVT = Vec.getSimpleValueType();
10529   MVT SubVecVT = SubVec.getSimpleValueType();
10530 
10531   SDLoc DL(Op);
10532   MVT XLenVT = Subtarget.getXLenVT();
10533   unsigned OrigIdx = Op.getConstantOperandVal(2);
10534   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10535 
10536   if (OrigIdx == 0 && Vec.isUndef())
10537     return Op;
10538 
10539   // We don't have the ability to slide mask vectors up indexed by their i1
10540   // elements; the smallest we can do is i8. Often we are able to bitcast to
10541   // equivalent i8 vectors. Note that when inserting a fixed-length vector
10542   // into a scalable one, we might not necessarily have enough scalable
10543   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10544   if (SubVecVT.getVectorElementType() == MVT::i1) {
10545     if (VecVT.getVectorMinNumElements() >= 8 &&
10546         SubVecVT.getVectorMinNumElements() >= 8) {
10547       assert(OrigIdx % 8 == 0 && "Invalid index");
10548       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10549              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10550              "Unexpected mask vector lowering");
10551       OrigIdx /= 8;
10552       SubVecVT =
10553           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10554                            SubVecVT.isScalableVector());
10555       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10556                                VecVT.isScalableVector());
10557       Vec = DAG.getBitcast(VecVT, Vec);
10558       SubVec = DAG.getBitcast(SubVecVT, SubVec);
10559     } else {
10560       // We can't slide this mask vector up indexed by its i1 elements.
10561       // This poses a problem when we wish to insert a scalable vector which
10562       // can't be re-expressed as a larger type. Just choose the slow path and
10563       // extend to a larger type, then truncate back down.
10564       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10565       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10566       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10567       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10568       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10569                         Op.getOperand(2));
10570       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10571       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10572     }
10573   }
10574 
10575   // If the subvector vector is a fixed-length type and we don't know VLEN
10576   // exactly, we cannot use subregister manipulation to simplify the codegen; we
10577   // don't know which register of a LMUL group contains the specific subvector
10578   // as we only know the minimum register size. Therefore we must slide the
10579   // vector group up the full amount.
10580   const auto VLen = Subtarget.getRealVLen();
10581   if (SubVecVT.isFixedLengthVector() && !VLen) {
10582     MVT ContainerVT = VecVT;
10583     if (VecVT.isFixedLengthVector()) {
10584       ContainerVT = getContainerForFixedLengthVector(VecVT);
10585       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10586     }
10587 
10588     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10589                          DAG.getUNDEF(ContainerVT), SubVec,
10590                          DAG.getVectorIdxConstant(0, DL));
10591 
10592     SDValue Mask =
10593         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10594     // Set the vector length to only the number of elements we care about. Note
10595     // that for slideup this includes the offset.
10596     unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10597     SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10598 
10599     // Use tail agnostic policy if we're inserting over Vec's tail.
10600     unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
10601     if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10602       Policy = RISCVII::TAIL_AGNOSTIC;
10603 
10604     // If we're inserting into the lowest elements, use a tail undisturbed
10605     // vmv.v.v.
10606     if (OrigIdx == 0) {
10607       SubVec =
10608           DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10609     } else {
10610       SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10611       SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10612                            SlideupAmt, Mask, VL, Policy);
10613     }
10614 
10615     if (VecVT.isFixedLengthVector())
10616       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10617     return DAG.getBitcast(Op.getValueType(), SubVec);
10618   }
10619 
10620   MVT ContainerVecVT = VecVT;
10621   if (VecVT.isFixedLengthVector()) {
10622     ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10623     Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10624   }
10625 
10626   MVT ContainerSubVecVT = SubVecVT;
10627   if (SubVecVT.isFixedLengthVector()) {
10628     ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10629     SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10630   }
10631 
10632   unsigned SubRegIdx;
10633   ElementCount RemIdx;
10634   // insert_subvector scales the index by vscale if the subvector is scalable,
10635   // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10636   // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10637   if (SubVecVT.isFixedLengthVector()) {
10638     assert(VLen);
10639     unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10640     auto Decompose =
10641         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10642             ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10643     SubRegIdx = Decompose.first;
10644     RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10645                                     (OrigIdx % Vscale));
10646   } else {
10647     auto Decompose =
10648         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10649             ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10650     SubRegIdx = Decompose.first;
10651     RemIdx = ElementCount::getScalable(Decompose.second);
10652   }
10653 
10654   TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
10655   assert(isPowerOf2_64(
10656       Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10657   bool ExactlyVecRegSized =
10658       Subtarget.expandVScale(SubVecVT.getSizeInBits())
10659           .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10660 
10661   // 1. If the Idx has been completely eliminated and this subvector's size is
10662   // a vector register or a multiple thereof, or the surrounding elements are
10663   // undef, then this is a subvector insert which naturally aligns to a vector
10664   // register. These can easily be handled using subregister manipulation.
10665   // 2. If the subvector isn't an exact multiple of a valid register group size,
10666   // then the insertion must preserve the undisturbed elements of the register.
10667   // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10668   // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10669   // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10670   // of that LMUL=1 type back into the larger vector (resolving to another
10671   // subregister operation). See below for how our VSLIDEUP works. We go via a
10672   // LMUL=1 type to avoid allocating a large register group to hold our
10673   // subvector.
10674   if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10675     if (SubVecVT.isFixedLengthVector()) {
10676       // We may get NoSubRegister if inserting at index 0 and the subvec
10677       // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10678       if (SubRegIdx == RISCV::NoSubRegister) {
10679         assert(OrigIdx == 0);
10680         return Op;
10681       }
10682 
10683       // Use a insert_subvector that will resolve to an insert subreg.
10684       assert(VLen);
10685       unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10686       SDValue Insert =
10687           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10688                       DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10689       if (VecVT.isFixedLengthVector())
10690         Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10691       return Insert;
10692     }
10693     return Op;
10694   }
10695 
10696   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10697   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10698   // (in our case undisturbed). This means we can set up a subvector insertion
10699   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10700   // size of the subvector.
10701   MVT InterSubVT = ContainerVecVT;
10702   SDValue AlignedExtract = Vec;
10703   unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10704   if (SubVecVT.isFixedLengthVector()) {
10705     assert(VLen);
10706     AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10707   }
10708   if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10709     InterSubVT = getLMUL1VT(ContainerVecVT);
10710     // Extract a subvector equal to the nearest full vector register type. This
10711     // should resolve to a EXTRACT_SUBREG instruction.
10712     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10713                                  DAG.getVectorIdxConstant(AlignedIdx, DL));
10714   }
10715 
10716   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10717                        DAG.getUNDEF(InterSubVT), SubVec,
10718                        DAG.getVectorIdxConstant(0, DL));
10719 
10720   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10721 
10722   ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10723   VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10724 
10725   // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10726   unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
10727   if (Subtarget.expandVScale(EndIndex) ==
10728       Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10729     Policy = RISCVII::TAIL_AGNOSTIC;
10730 
10731   // If we're inserting into the lowest elements, use a tail undisturbed
10732   // vmv.v.v.
10733   if (RemIdx.isZero()) {
10734     SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10735                          SubVec, VL);
10736   } else {
10737     SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10738 
10739     // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10740     VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10741 
10742     SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10743                          SlideupAmt, Mask, VL, Policy);
10744   }
10745 
10746   // If required, insert this subvector back into the correct vector register.
10747   // This should resolve to an INSERT_SUBREG instruction.
10748   if (ContainerVecVT.bitsGT(InterSubVT))
10749     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10750                          DAG.getVectorIdxConstant(AlignedIdx, DL));
10751 
10752   if (VecVT.isFixedLengthVector())
10753     SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10754 
10755   // We might have bitcast from a mask type: cast back to the original type if
10756   // required.
10757   return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10758 }
10759 
10760 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10761                                                     SelectionDAG &DAG) const {
10762   SDValue Vec = Op.getOperand(0);
10763   MVT SubVecVT = Op.getSimpleValueType();
10764   MVT VecVT = Vec.getSimpleValueType();
10765 
10766   SDLoc DL(Op);
10767   MVT XLenVT = Subtarget.getXLenVT();
10768   unsigned OrigIdx = Op.getConstantOperandVal(1);
10769   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10770 
10771   // With an index of 0 this is a cast-like subvector, which can be performed
10772   // with subregister operations.
10773   if (OrigIdx == 0)
10774     return Op;
10775 
10776   // We don't have the ability to slide mask vectors down indexed by their i1
10777   // elements; the smallest we can do is i8. Often we are able to bitcast to
10778   // equivalent i8 vectors. Note that when extracting a fixed-length vector
10779   // from a scalable one, we might not necessarily have enough scalable
10780   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10781   if (SubVecVT.getVectorElementType() == MVT::i1) {
10782     if (VecVT.getVectorMinNumElements() >= 8 &&
10783         SubVecVT.getVectorMinNumElements() >= 8) {
10784       assert(OrigIdx % 8 == 0 && "Invalid index");
10785       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10786              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10787              "Unexpected mask vector lowering");
10788       OrigIdx /= 8;
10789       SubVecVT =
10790           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10791                            SubVecVT.isScalableVector());
10792       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10793                                VecVT.isScalableVector());
10794       Vec = DAG.getBitcast(VecVT, Vec);
10795     } else {
10796       // We can't slide this mask vector down, indexed by its i1 elements.
10797       // This poses a problem when we wish to extract a scalable vector which
10798       // can't be re-expressed as a larger type. Just choose the slow path and
10799       // extend to a larger type, then truncate back down.
10800       // TODO: We could probably improve this when extracting certain fixed
10801       // from fixed, where we can extract as i8 and shift the correct element
10802       // right to reach the desired subvector?
10803       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10804       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10805       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10806       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10807                         Op.getOperand(1));
10808       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10809       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10810     }
10811   }
10812 
10813   const auto VLen = Subtarget.getRealVLen();
10814 
10815   // If the subvector vector is a fixed-length type and we don't know VLEN
10816   // exactly, we cannot use subregister manipulation to simplify the codegen; we
10817   // don't know which register of a LMUL group contains the specific subvector
10818   // as we only know the minimum register size. Therefore we must slide the
10819   // vector group down the full amount.
10820   if (SubVecVT.isFixedLengthVector() && !VLen) {
10821     MVT ContainerVT = VecVT;
10822     if (VecVT.isFixedLengthVector()) {
10823       ContainerVT = getContainerForFixedLengthVector(VecVT);
10824       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10825     }
10826 
10827     // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10828     unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10829     if (auto ShrunkVT =
10830             getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10831       ContainerVT = *ShrunkVT;
10832       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10833                         DAG.getVectorIdxConstant(0, DL));
10834     }
10835 
10836     SDValue Mask =
10837         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10838     // Set the vector length to only the number of elements we care about. This
10839     // avoids sliding down elements we're going to discard straight away.
10840     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10841     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10842     SDValue Slidedown =
10843         getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10844                       DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10845     // Now we can use a cast-like subvector extract to get the result.
10846     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10847                             DAG.getVectorIdxConstant(0, DL));
10848     return DAG.getBitcast(Op.getValueType(), Slidedown);
10849   }
10850 
10851   if (VecVT.isFixedLengthVector()) {
10852     VecVT = getContainerForFixedLengthVector(VecVT);
10853     Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10854   }
10855 
10856   MVT ContainerSubVecVT = SubVecVT;
10857   if (SubVecVT.isFixedLengthVector())
10858     ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10859 
10860   unsigned SubRegIdx;
10861   ElementCount RemIdx;
10862   // extract_subvector scales the index by vscale if the subvector is scalable,
10863   // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10864   // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10865   if (SubVecVT.isFixedLengthVector()) {
10866     assert(VLen);
10867     unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10868     auto Decompose =
10869         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10870             VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10871     SubRegIdx = Decompose.first;
10872     RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10873                                     (OrigIdx % Vscale));
10874   } else {
10875     auto Decompose =
10876         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
10877             VecVT, ContainerSubVecVT, OrigIdx, TRI);
10878     SubRegIdx = Decompose.first;
10879     RemIdx = ElementCount::getScalable(Decompose.second);
10880   }
10881 
10882   // If the Idx has been completely eliminated then this is a subvector extract
10883   // which naturally aligns to a vector register. These can easily be handled
10884   // using subregister manipulation. We use an extract_subvector that will
10885   // resolve to an extract subreg.
10886   if (RemIdx.isZero()) {
10887     if (SubVecVT.isFixedLengthVector()) {
10888       assert(VLen);
10889       unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10890       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10891                         DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10892       return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10893     }
10894     return Op;
10895   }
10896 
10897   // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10898   // was > M1 then the index would need to be a multiple of VLMAX, and so would
10899   // divide exactly.
10900   assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10901          getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10902 
10903   // If the vector type is an LMUL-group type, extract a subvector equal to the
10904   // nearest full vector register type.
10905   MVT InterSubVT = VecVT;
10906   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10907     // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10908     // we should have successfully decomposed the extract into a subregister.
10909     // We use an extract_subvector that will resolve to a subreg extract.
10910     assert(SubRegIdx != RISCV::NoSubRegister);
10911     (void)SubRegIdx;
10912     unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10913     if (SubVecVT.isFixedLengthVector()) {
10914       assert(VLen);
10915       Idx /= *VLen / RISCV::RVVBitsPerBlock;
10916     }
10917     InterSubVT = getLMUL1VT(VecVT);
10918     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10919                       DAG.getConstant(Idx, DL, XLenVT));
10920   }
10921 
10922   // Slide this vector register down by the desired number of elements in order
10923   // to place the desired subvector starting at element 0.
10924   SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10925   auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10926   if (SubVecVT.isFixedLengthVector())
10927     VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10928   SDValue Slidedown =
10929       getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10930                     Vec, SlidedownAmt, Mask, VL);
10931 
10932   // Now the vector is in the right position, extract our final subvector. This
10933   // should resolve to a COPY.
10934   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10935                           DAG.getVectorIdxConstant(0, DL));
10936 
10937   // We might have bitcast from a mask type: cast back to the original type if
10938   // required.
10939   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10940 }
10941 
10942 // Widen a vector's operands to i8, then truncate its results back to the
10943 // original type, typically i1.  All operand and result types must be the same.
10944 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
10945                                   SelectionDAG &DAG) {
10946   MVT VT = N.getSimpleValueType();
10947   MVT WideVT = VT.changeVectorElementType(MVT::i8);
10948   SmallVector<SDValue, 4> WideOps;
10949   for (SDValue Op : N->ops()) {
10950     assert(Op.getSimpleValueType() == VT &&
10951            "Operands and result must be same type");
10952     WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10953   }
10954 
10955   unsigned NumVals = N->getNumValues();
10956 
10957   SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
10958       NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10959   SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10960   SmallVector<SDValue, 4> TruncVals;
10961   for (unsigned I = 0; I < NumVals; I++) {
10962     TruncVals.push_back(
10963         DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10964                      DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10965   }
10966 
10967   if (TruncVals.size() > 1)
10968     return DAG.getMergeValues(TruncVals, DL);
10969   return TruncVals.front();
10970 }
10971 
10972 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10973                                                       SelectionDAG &DAG) const {
10974   SDLoc DL(Op);
10975   MVT VecVT = Op.getSimpleValueType();
10976 
10977   assert(VecVT.isScalableVector() &&
10978          "vector_interleave on non-scalable vector!");
10979 
10980   // 1 bit element vectors need to be widened to e8
10981   if (VecVT.getVectorElementType() == MVT::i1)
10982     return widenVectorOpsToi8(Op, DL, DAG);
10983 
10984   // If the VT is LMUL=8, we need to split and reassemble.
10985   if (VecVT.getSizeInBits().getKnownMinValue() ==
10986       (8 * RISCV::RVVBitsPerBlock)) {
10987     auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10988     auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10989     EVT SplitVT = Op0Lo.getValueType();
10990 
10991     SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
10992                                 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10993     SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
10994                                 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10995 
10996     SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10997                                ResLo.getValue(0), ResHi.getValue(0));
10998     SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10999                               ResHi.getValue(1));
11000     return DAG.getMergeValues({Even, Odd}, DL);
11001   }
11002 
11003   // Concatenate the two vectors as one vector to deinterleave
11004   MVT ConcatVT =
11005       MVT::getVectorVT(VecVT.getVectorElementType(),
11006                        VecVT.getVectorElementCount().multiplyCoefficientBy(2));
11007   SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11008                                Op.getOperand(0), Op.getOperand(1));
11009 
11010   // We can deinterleave through vnsrl.wi if the element type is smaller than
11011   // ELEN
11012   if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11013     SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
11014     SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
11015     return DAG.getMergeValues({Even, Odd}, DL);
11016   }
11017 
11018   // For the indices, use the vmv.v.x of an i8 constant to fill the largest
11019   // possibly mask vector, then extract the required subvector.  Doing this
11020   // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
11021   // creation to be rematerialized during register allocation to reduce
11022   // register pressure if needed.
11023 
11024   MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
11025 
11026   SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
11027   EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
11028   SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
11029                                  DAG.getVectorIdxConstant(0, DL));
11030 
11031   SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
11032   OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
11033   SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
11034                                 DAG.getVectorIdxConstant(0, DL));
11035 
11036   // vcompress the even and odd elements into two separate vectors
11037   SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11038                                  EvenMask, DAG.getUNDEF(ConcatVT));
11039   SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11040                                 OddMask, DAG.getUNDEF(ConcatVT));
11041 
11042   // Extract the result half of the gather for even and odd
11043   SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
11044                              DAG.getVectorIdxConstant(0, DL));
11045   SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
11046                             DAG.getVectorIdxConstant(0, DL));
11047 
11048   return DAG.getMergeValues({Even, Odd}, DL);
11049 }
11050 
11051 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
11052                                                     SelectionDAG &DAG) const {
11053   SDLoc DL(Op);
11054   MVT VecVT = Op.getSimpleValueType();
11055 
11056   assert(VecVT.isScalableVector() &&
11057          "vector_interleave on non-scalable vector!");
11058 
11059   // i1 vectors need to be widened to i8
11060   if (VecVT.getVectorElementType() == MVT::i1)
11061     return widenVectorOpsToi8(Op, DL, DAG);
11062 
11063   MVT XLenVT = Subtarget.getXLenVT();
11064   SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
11065 
11066   // If the VT is LMUL=8, we need to split and reassemble.
11067   if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
11068     auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11069     auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
11070     EVT SplitVT = Op0Lo.getValueType();
11071 
11072     SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
11073                                 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11074     SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
11075                                 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11076 
11077     SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11078                              ResLo.getValue(0), ResLo.getValue(1));
11079     SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11080                              ResHi.getValue(0), ResHi.getValue(1));
11081     return DAG.getMergeValues({Lo, Hi}, DL);
11082   }
11083 
11084   SDValue Interleaved;
11085 
11086   // If the element type is smaller than ELEN, then we can interleave with
11087   // vwaddu.vv and vwmaccu.vx
11088   if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11089     Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
11090                                         DAG, Subtarget);
11091   } else {
11092     // Otherwise, fallback to using vrgathere16.vv
11093     MVT ConcatVT =
11094       MVT::getVectorVT(VecVT.getVectorElementType(),
11095                        VecVT.getVectorElementCount().multiplyCoefficientBy(2));
11096     SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11097                                  Op.getOperand(0), Op.getOperand(1));
11098 
11099     MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11100 
11101     // 0 1 2 3 4 5 6 7 ...
11102     SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11103 
11104     // 1 1 1 1 1 1 1 1 ...
11105     SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11106 
11107     // 1 0 1 0 1 0 1 0 ...
11108     SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11109     OddMask = DAG.getSetCC(
11110         DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11111         DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11112         ISD::CondCode::SETNE);
11113 
11114     SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11115 
11116     // Build up the index vector for interleaving the concatenated vector
11117     //      0      0      1      1      2      2      3      3 ...
11118     SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11119     //      0      n      1    n+1      2    n+2      3    n+3 ...
11120     Idx =
11121         DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11122 
11123     // Then perform the interleave
11124     //   v[0]   v[n]   v[1] v[n+1]   v[2] v[n+2]   v[3] v[n+3] ...
11125     SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11126     Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11127                               Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11128   }
11129 
11130   // Extract the two halves from the interleaved result
11131   SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11132                            DAG.getVectorIdxConstant(0, DL));
11133   SDValue Hi = DAG.getNode(
11134       ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11135       DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL));
11136 
11137   return DAG.getMergeValues({Lo, Hi}, DL);
11138 }
11139 
11140 // Lower step_vector to the vid instruction. Any non-identity step value must
11141 // be accounted for my manual expansion.
11142 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11143                                               SelectionDAG &DAG) const {
11144   SDLoc DL(Op);
11145   MVT VT = Op.getSimpleValueType();
11146   assert(VT.isScalableVector() && "Expected scalable vector");
11147   MVT XLenVT = Subtarget.getXLenVT();
11148   auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11149   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11150   uint64_t StepValImm = Op.getConstantOperandVal(0);
11151   if (StepValImm != 1) {
11152     if (isPowerOf2_64(StepValImm)) {
11153       SDValue StepVal =
11154           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11155                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11156       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11157     } else {
11158       SDValue StepVal = lowerScalarSplat(
11159           SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11160           VL, VT, DL, DAG, Subtarget);
11161       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11162     }
11163   }
11164   return StepVec;
11165 }
11166 
11167 // Implement vector_reverse using vrgather.vv with indices determined by
11168 // subtracting the id of each element from (VLMAX-1). This will convert
11169 // the indices like so:
11170 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11171 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11172 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11173                                                  SelectionDAG &DAG) const {
11174   SDLoc DL(Op);
11175   MVT VecVT = Op.getSimpleValueType();
11176   if (VecVT.getVectorElementType() == MVT::i1) {
11177     MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11178     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11179     SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11180     return DAG.getSetCC(DL, VecVT, Op2,
11181                         DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11182   }
11183 
11184   MVT ContainerVT = VecVT;
11185   SDValue Vec = Op.getOperand(0);
11186   if (VecVT.isFixedLengthVector()) {
11187     ContainerVT = getContainerForFixedLengthVector(VecVT);
11188     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11189   }
11190 
11191   MVT XLenVT = Subtarget.getXLenVT();
11192   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11193 
11194   // On some uarchs vrgather.vv will read from every input register for each
11195   // output register, regardless of the indices. However to reverse a vector
11196   // each output register only needs to read from one register. So decompose it
11197   // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11198   // O(LMUL^2).
11199   //
11200   // vsetvli a1, zero, e64, m4, ta, ma
11201   // vrgatherei16.vv v12, v8, v16
11202   // ->
11203   // vsetvli a1, zero, e64, m1, ta, ma
11204   // vrgather.vv v15, v8, v16
11205   // vrgather.vv v14, v9, v16
11206   // vrgather.vv v13, v10, v16
11207   // vrgather.vv v12, v11, v16
11208   if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11209       ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11210     auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11211     Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11212     Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11213     SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11214 
11215     // Fixed length vectors might not fit exactly into their container, and so
11216     // leave a gap in the front of the vector after being reversed. Slide this
11217     // away.
11218     //
11219     // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11220     // 0 1 2 3 x x x x <- reverse
11221     // x x x x 0 1 2 3 <- vslidedown.vx
11222     if (VecVT.isFixedLengthVector()) {
11223       SDValue Offset = DAG.getNode(
11224           ISD::SUB, DL, XLenVT,
11225           DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11226           DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11227       Concat =
11228           getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11229                         DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11230       Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11231     }
11232     return Concat;
11233   }
11234 
11235   unsigned EltSize = ContainerVT.getScalarSizeInBits();
11236   unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11237   unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11238   unsigned MaxVLMAX =
11239       VecVT.isFixedLengthVector()
11240           ? VecVT.getVectorNumElements()
11241           : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11242 
11243   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11244   MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11245 
11246   // If this is SEW=8 and VLMAX is potentially more than 256, we need
11247   // to use vrgatherei16.vv.
11248   if (MaxVLMAX > 256 && EltSize == 8) {
11249     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11250     // Reverse each half, then reassemble them in reverse order.
11251     // NOTE: It's also possible that after splitting that VLMAX no longer
11252     // requires vrgatherei16.vv.
11253     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11254       auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11255       auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11256       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11257       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11258       // Reassemble the low and high pieces reversed.
11259       // FIXME: This is a CONCAT_VECTORS.
11260       SDValue Res =
11261           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11262                       DAG.getVectorIdxConstant(0, DL));
11263       return DAG.getNode(
11264           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11265           DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11266     }
11267 
11268     // Just promote the int type to i16 which will double the LMUL.
11269     IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11270     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11271   }
11272 
11273   // At LMUL > 1, do the index computation in 16 bits to reduce register
11274   // pressure.
11275   if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11276       IntVT.bitsGT(getLMUL1VT(IntVT))) {
11277     assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11278     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11279     IntVT = IntVT.changeVectorElementType(MVT::i16);
11280   }
11281 
11282   // Calculate VLMAX-1 for the desired SEW.
11283   SDValue VLMinus1 = DAG.getNode(
11284       ISD::SUB, DL, XLenVT,
11285       DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11286       DAG.getConstant(1, DL, XLenVT));
11287 
11288   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11289   bool IsRV32E64 =
11290       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11291   SDValue SplatVL;
11292   if (!IsRV32E64)
11293     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11294   else
11295     SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11296                           VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11297 
11298   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11299   SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11300                                 DAG.getUNDEF(IntVT), Mask, VL);
11301 
11302   SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11303                                DAG.getUNDEF(ContainerVT), Mask, VL);
11304   if (VecVT.isFixedLengthVector())
11305     Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11306   return Gather;
11307 }
11308 
11309 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11310                                                 SelectionDAG &DAG) const {
11311   SDLoc DL(Op);
11312   SDValue V1 = Op.getOperand(0);
11313   SDValue V2 = Op.getOperand(1);
11314   MVT XLenVT = Subtarget.getXLenVT();
11315   MVT VecVT = Op.getSimpleValueType();
11316 
11317   SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11318 
11319   int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11320   SDValue DownOffset, UpOffset;
11321   if (ImmValue >= 0) {
11322     // The operand is a TargetConstant, we need to rebuild it as a regular
11323     // constant.
11324     DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11325     UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11326   } else {
11327     // The operand is a TargetConstant, we need to rebuild it as a regular
11328     // constant rather than negating the original operand.
11329     UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11330     DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11331   }
11332 
11333   SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11334 
11335   SDValue SlideDown =
11336       getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11337                     DownOffset, TrueMask, UpOffset);
11338   return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11339                      TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11340                      RISCVII::TAIL_AGNOSTIC);
11341 }
11342 
11343 SDValue
11344 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11345                                                      SelectionDAG &DAG) const {
11346   SDLoc DL(Op);
11347   auto *Load = cast<LoadSDNode>(Op);
11348 
11349   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
11350                                         Load->getMemoryVT(),
11351                                         *Load->getMemOperand()) &&
11352          "Expecting a correctly-aligned load");
11353 
11354   MVT VT = Op.getSimpleValueType();
11355   MVT XLenVT = Subtarget.getXLenVT();
11356   MVT ContainerVT = getContainerForFixedLengthVector(VT);
11357 
11358   // If we know the exact VLEN and our fixed length vector completely fills
11359   // the container, use a whole register load instead.
11360   const auto [MinVLMAX, MaxVLMAX] =
11361       RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11362   if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11363       getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11364     MachineMemOperand *MMO = Load->getMemOperand();
11365     SDValue NewLoad =
11366         DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11367                     MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11368                     MMO->getAAInfo(), MMO->getRanges());
11369     SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11370     return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11371   }
11372 
11373   SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11374 
11375   bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11376   SDValue IntID = DAG.getTargetConstant(
11377       IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11378   SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11379   if (!IsMaskOp)
11380     Ops.push_back(DAG.getUNDEF(ContainerVT));
11381   Ops.push_back(Load->getBasePtr());
11382   Ops.push_back(VL);
11383   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11384   SDValue NewLoad =
11385       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
11386                               Load->getMemoryVT(), Load->getMemOperand());
11387 
11388   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11389   return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11390 }
11391 
11392 SDValue
11393 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11394                                                       SelectionDAG &DAG) const {
11395   SDLoc DL(Op);
11396   auto *Store = cast<StoreSDNode>(Op);
11397 
11398   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
11399                                         Store->getMemoryVT(),
11400                                         *Store->getMemOperand()) &&
11401          "Expecting a correctly-aligned store");
11402 
11403   SDValue StoreVal = Store->getValue();
11404   MVT VT = StoreVal.getSimpleValueType();
11405   MVT XLenVT = Subtarget.getXLenVT();
11406 
11407   // If the size less than a byte, we need to pad with zeros to make a byte.
11408   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11409     VT = MVT::v8i1;
11410     StoreVal =
11411         DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11412                     StoreVal, DAG.getVectorIdxConstant(0, DL));
11413   }
11414 
11415   MVT ContainerVT = getContainerForFixedLengthVector(VT);
11416 
11417   SDValue NewValue =
11418       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11419 
11420   // If we know the exact VLEN and our fixed length vector completely fills
11421   // the container, use a whole register store instead.
11422   const auto [MinVLMAX, MaxVLMAX] =
11423       RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11424   if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11425       getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11426     MachineMemOperand *MMO = Store->getMemOperand();
11427     return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11428                         MMO->getPointerInfo(), MMO->getBaseAlign(),
11429                         MMO->getFlags(), MMO->getAAInfo());
11430   }
11431 
11432   SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11433 
11434   bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11435   SDValue IntID = DAG.getTargetConstant(
11436       IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11437   return DAG.getMemIntrinsicNode(
11438       ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11439       {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11440       Store->getMemoryVT(), Store->getMemOperand());
11441 }
11442 
11443 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11444                                              SelectionDAG &DAG) const {
11445   SDLoc DL(Op);
11446   MVT VT = Op.getSimpleValueType();
11447 
11448   const auto *MemSD = cast<MemSDNode>(Op);
11449   EVT MemVT = MemSD->getMemoryVT();
11450   MachineMemOperand *MMO = MemSD->getMemOperand();
11451   SDValue Chain = MemSD->getChain();
11452   SDValue BasePtr = MemSD->getBasePtr();
11453 
11454   SDValue Mask, PassThru, VL;
11455   bool IsExpandingLoad = false;
11456   if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11457     Mask = VPLoad->getMask();
11458     PassThru = DAG.getUNDEF(VT);
11459     VL = VPLoad->getVectorLength();
11460   } else {
11461     const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11462     Mask = MLoad->getMask();
11463     PassThru = MLoad->getPassThru();
11464     IsExpandingLoad = MLoad->isExpandingLoad();
11465   }
11466 
11467   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11468 
11469   MVT XLenVT = Subtarget.getXLenVT();
11470 
11471   MVT ContainerVT = VT;
11472   if (VT.isFixedLengthVector()) {
11473     ContainerVT = getContainerForFixedLengthVector(VT);
11474     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11475     if (!IsUnmasked) {
11476       MVT MaskVT = getMaskTypeFor(ContainerVT);
11477       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11478     }
11479   }
11480 
11481   if (!VL)
11482     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11483 
11484   SDValue ExpandingVL;
11485   if (!IsUnmasked && IsExpandingLoad) {
11486     ExpandingVL = VL;
11487     VL =
11488         DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11489                     getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11490   }
11491 
11492   unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11493                                                  : Intrinsic::riscv_vle_mask;
11494   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11495   if (IntID == Intrinsic::riscv_vle)
11496     Ops.push_back(DAG.getUNDEF(ContainerVT));
11497   else
11498     Ops.push_back(PassThru);
11499   Ops.push_back(BasePtr);
11500   if (IntID == Intrinsic::riscv_vle_mask)
11501     Ops.push_back(Mask);
11502   Ops.push_back(VL);
11503   if (IntID == Intrinsic::riscv_vle_mask)
11504     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11505 
11506   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11507 
11508   SDValue Result =
11509       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11510   Chain = Result.getValue(1);
11511   if (ExpandingVL) {
11512     MVT IndexVT = ContainerVT;
11513     if (ContainerVT.isFloatingPoint())
11514       IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11515 
11516     MVT IndexEltVT = IndexVT.getVectorElementType();
11517     bool UseVRGATHEREI16 = false;
11518     // If index vector is an i8 vector and the element count exceeds 256, we
11519     // should change the element type of index vector to i16 to avoid
11520     // overflow.
11521     if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11522       // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11523       assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11524       IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11525       UseVRGATHEREI16 = true;
11526     }
11527 
11528     SDValue Iota =
11529         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11530                     DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11531                     DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11532     Result =
11533         DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11534                                     : RISCVISD::VRGATHER_VV_VL,
11535                     DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11536   }
11537 
11538   if (VT.isFixedLengthVector())
11539     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11540 
11541   return DAG.getMergeValues({Result, Chain}, DL);
11542 }
11543 
11544 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11545                                               SelectionDAG &DAG) const {
11546   SDLoc DL(Op);
11547 
11548   const auto *MemSD = cast<MemSDNode>(Op);
11549   EVT MemVT = MemSD->getMemoryVT();
11550   MachineMemOperand *MMO = MemSD->getMemOperand();
11551   SDValue Chain = MemSD->getChain();
11552   SDValue BasePtr = MemSD->getBasePtr();
11553   SDValue Val, Mask, VL;
11554 
11555   bool IsCompressingStore = false;
11556   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11557     Val = VPStore->getValue();
11558     Mask = VPStore->getMask();
11559     VL = VPStore->getVectorLength();
11560   } else {
11561     const auto *MStore = cast<MaskedStoreSDNode>(Op);
11562     Val = MStore->getValue();
11563     Mask = MStore->getMask();
11564     IsCompressingStore = MStore->isCompressingStore();
11565   }
11566 
11567   bool IsUnmasked =
11568       ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11569 
11570   MVT VT = Val.getSimpleValueType();
11571   MVT XLenVT = Subtarget.getXLenVT();
11572 
11573   MVT ContainerVT = VT;
11574   if (VT.isFixedLengthVector()) {
11575     ContainerVT = getContainerForFixedLengthVector(VT);
11576 
11577     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11578     if (!IsUnmasked || IsCompressingStore) {
11579       MVT MaskVT = getMaskTypeFor(ContainerVT);
11580       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11581     }
11582   }
11583 
11584   if (!VL)
11585     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11586 
11587   if (IsCompressingStore) {
11588     Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11589                       DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11590                       DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11591     VL =
11592         DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11593                     getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11594   }
11595 
11596   unsigned IntID =
11597       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11598   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11599   Ops.push_back(Val);
11600   Ops.push_back(BasePtr);
11601   if (!IsUnmasked)
11602     Ops.push_back(Mask);
11603   Ops.push_back(VL);
11604 
11605   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
11606                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11607 }
11608 
11609 SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11610                                                  SelectionDAG &DAG) const {
11611   SDLoc DL(Op);
11612   SDValue Val = Op.getOperand(0);
11613   SDValue Mask = Op.getOperand(1);
11614   SDValue Passthru = Op.getOperand(2);
11615 
11616   MVT VT = Val.getSimpleValueType();
11617   MVT XLenVT = Subtarget.getXLenVT();
11618   MVT ContainerVT = VT;
11619   if (VT.isFixedLengthVector()) {
11620     ContainerVT = getContainerForFixedLengthVector(VT);
11621     MVT MaskVT = getMaskTypeFor(ContainerVT);
11622     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11623     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11624     Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11625   }
11626 
11627   SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11628   SDValue Res =
11629       DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11630                   DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11631                   Passthru, Val, Mask, VL);
11632 
11633   if (VT.isFixedLengthVector())
11634     Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11635 
11636   return Res;
11637 }
11638 
11639 SDValue
11640 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11641                                                       SelectionDAG &DAG) const {
11642   MVT InVT = Op.getOperand(0).getSimpleValueType();
11643   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11644 
11645   MVT VT = Op.getSimpleValueType();
11646 
11647   SDValue Op1 =
11648       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11649   SDValue Op2 =
11650       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11651 
11652   SDLoc DL(Op);
11653   auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11654                                     DAG, Subtarget);
11655   MVT MaskVT = getMaskTypeFor(ContainerVT);
11656 
11657   SDValue Cmp =
11658       DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11659                   {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11660 
11661   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11662 }
11663 
11664 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11665                                                      SelectionDAG &DAG) const {
11666   unsigned Opc = Op.getOpcode();
11667   SDLoc DL(Op);
11668   SDValue Chain = Op.getOperand(0);
11669   SDValue Op1 = Op.getOperand(1);
11670   SDValue Op2 = Op.getOperand(2);
11671   SDValue CC = Op.getOperand(3);
11672   ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11673   MVT VT = Op.getSimpleValueType();
11674   MVT InVT = Op1.getSimpleValueType();
11675 
11676   // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11677   // condition code.
11678   if (Opc == ISD::STRICT_FSETCCS) {
11679     // Expand strict_fsetccs(x, oeq) to
11680     // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11681     SDVTList VTList = Op->getVTList();
11682     if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11683       SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11684       SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11685                                  Op2, OLECCVal);
11686       SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11687                                  Op1, OLECCVal);
11688       SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11689                                      Tmp1.getValue(1), Tmp2.getValue(1));
11690       // Tmp1 and Tmp2 might be the same node.
11691       if (Tmp1 != Tmp2)
11692         Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11693       return DAG.getMergeValues({Tmp1, OutChain}, DL);
11694     }
11695 
11696     // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11697     if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11698       SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11699       SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11700                                 Op2, OEQCCVal);
11701       SDValue Res = DAG.getNOT(DL, OEQ, VT);
11702       return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11703     }
11704   }
11705 
11706   MVT ContainerInVT = InVT;
11707   if (InVT.isFixedLengthVector()) {
11708     ContainerInVT = getContainerForFixedLengthVector(InVT);
11709     Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11710     Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11711   }
11712   MVT MaskVT = getMaskTypeFor(ContainerInVT);
11713 
11714   auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11715 
11716   SDValue Res;
11717   if (Opc == ISD::STRICT_FSETCC &&
11718       (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11719        CCVal == ISD::SETOLE)) {
11720     // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11721     // active when both input elements are ordered.
11722     SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11723     SDValue OrderMask1 = DAG.getNode(
11724         RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11725         {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11726          True, VL});
11727     SDValue OrderMask2 = DAG.getNode(
11728         RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11729         {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11730          True, VL});
11731     Mask =
11732         DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11733     // Use Mask as the passthru operand to let the result be 0 if either of the
11734     // inputs is unordered.
11735     Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
11736                       DAG.getVTList(MaskVT, MVT::Other),
11737                       {Chain, Op1, Op2, CC, Mask, Mask, VL});
11738   } else {
11739     unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11740                                                 : RISCVISD::STRICT_FSETCCS_VL;
11741     Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11742                       {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11743   }
11744 
11745   if (VT.isFixedLengthVector()) {
11746     SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11747     return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11748   }
11749   return Res;
11750 }
11751 
11752 // Lower vector ABS to smax(X, sub(0, X)).
11753 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11754   SDLoc DL(Op);
11755   MVT VT = Op.getSimpleValueType();
11756   SDValue X = Op.getOperand(0);
11757 
11758   assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11759          "Unexpected type for ISD::ABS");
11760 
11761   MVT ContainerVT = VT;
11762   if (VT.isFixedLengthVector()) {
11763     ContainerVT = getContainerForFixedLengthVector(VT);
11764     X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11765   }
11766 
11767   SDValue Mask, VL;
11768   if (Op->getOpcode() == ISD::VP_ABS) {
11769     Mask = Op->getOperand(1);
11770     if (VT.isFixedLengthVector())
11771       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11772                                      Subtarget);
11773     VL = Op->getOperand(2);
11774   } else
11775     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11776 
11777   SDValue SplatZero = DAG.getNode(
11778       RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11779       DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11780   SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11781                              DAG.getUNDEF(ContainerVT), Mask, VL);
11782   SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11783                             DAG.getUNDEF(ContainerVT), Mask, VL);
11784 
11785   if (VT.isFixedLengthVector())
11786     Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11787   return Max;
11788 }
11789 
11790 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11791     SDValue Op, SelectionDAG &DAG) const {
11792   SDLoc DL(Op);
11793   MVT VT = Op.getSimpleValueType();
11794   SDValue Mag = Op.getOperand(0);
11795   SDValue Sign = Op.getOperand(1);
11796   assert(Mag.getValueType() == Sign.getValueType() &&
11797          "Can only handle COPYSIGN with matching types.");
11798 
11799   MVT ContainerVT = getContainerForFixedLengthVector(VT);
11800   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11801   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11802 
11803   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11804 
11805   SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11806                                  Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11807 
11808   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11809 }
11810 
11811 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11812     SDValue Op, SelectionDAG &DAG) const {
11813   MVT VT = Op.getSimpleValueType();
11814   MVT ContainerVT = getContainerForFixedLengthVector(VT);
11815 
11816   MVT I1ContainerVT =
11817       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11818 
11819   SDValue CC =
11820       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11821   SDValue Op1 =
11822       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11823   SDValue Op2 =
11824       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11825 
11826   SDLoc DL(Op);
11827   SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11828 
11829   SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11830                                Op2, DAG.getUNDEF(ContainerVT), VL);
11831 
11832   return convertFromScalableVector(VT, Select, DAG, Subtarget);
11833 }
11834 
11835 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11836                                                SelectionDAG &DAG) const {
11837   unsigned NewOpc = getRISCVVLOp(Op);
11838   bool HasPassthruOp = hasPassthruOp(NewOpc);
11839   bool HasMask = hasMaskOp(NewOpc);
11840 
11841   MVT VT = Op.getSimpleValueType();
11842   MVT ContainerVT = getContainerForFixedLengthVector(VT);
11843 
11844   // Create list of operands by converting existing ones to scalable types.
11845   SmallVector<SDValue, 6> Ops;
11846   for (const SDValue &V : Op->op_values()) {
11847     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11848 
11849     // Pass through non-vector operands.
11850     if (!V.getValueType().isVector()) {
11851       Ops.push_back(V);
11852       continue;
11853     }
11854 
11855     // "cast" fixed length vector to a scalable vector.
11856     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11857            "Only fixed length vectors are supported!");
11858     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11859   }
11860 
11861   SDLoc DL(Op);
11862   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11863   if (HasPassthruOp)
11864     Ops.push_back(DAG.getUNDEF(ContainerVT));
11865   if (HasMask)
11866     Ops.push_back(Mask);
11867   Ops.push_back(VL);
11868 
11869   // StrictFP operations have two result values. Their lowered result should
11870   // have same result count.
11871   if (Op->isStrictFPOpcode()) {
11872     SDValue ScalableRes =
11873         DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11874                     Op->getFlags());
11875     SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11876     return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11877   }
11878 
11879   SDValue ScalableRes =
11880       DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11881   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11882 }
11883 
11884 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11885 // * Operands of each node are assumed to be in the same order.
11886 // * The EVL operand is promoted from i32 to i64 on RV64.
11887 // * Fixed-length vectors are converted to their scalable-vector container
11888 //   types.
11889 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11890   unsigned RISCVISDOpc = getRISCVVLOp(Op);
11891   bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11892 
11893   SDLoc DL(Op);
11894   MVT VT = Op.getSimpleValueType();
11895   SmallVector<SDValue, 4> Ops;
11896 
11897   MVT ContainerVT = VT;
11898   if (VT.isFixedLengthVector())
11899     ContainerVT = getContainerForFixedLengthVector(VT);
11900 
11901   for (const auto &OpIdx : enumerate(Op->ops())) {
11902     SDValue V = OpIdx.value();
11903     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11904     // Add dummy passthru value before the mask. Or if there isn't a mask,
11905     // before EVL.
11906     if (HasPassthruOp) {
11907       auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11908       if (MaskIdx) {
11909         if (*MaskIdx == OpIdx.index())
11910           Ops.push_back(DAG.getUNDEF(ContainerVT));
11911       } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11912                  OpIdx.index()) {
11913         if (Op.getOpcode() == ISD::VP_MERGE) {
11914           // For VP_MERGE, copy the false operand instead of an undef value.
11915           Ops.push_back(Ops.back());
11916         } else {
11917           assert(Op.getOpcode() == ISD::VP_SELECT);
11918           // For VP_SELECT, add an undef value.
11919           Ops.push_back(DAG.getUNDEF(ContainerVT));
11920         }
11921       }
11922     }
11923     // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11924     if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11925         ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11926       Ops.push_back(DAG.getTargetConstant(RISCVFPRndMode::DYN, DL,
11927                                           Subtarget.getXLenVT()));
11928     // Pass through operands which aren't fixed-length vectors.
11929     if (!V.getValueType().isFixedLengthVector()) {
11930       Ops.push_back(V);
11931       continue;
11932     }
11933     // "cast" fixed length vector to a scalable vector.
11934     MVT OpVT = V.getSimpleValueType();
11935     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11936     assert(useRVVForFixedLengthVectorVT(OpVT) &&
11937            "Only fixed length vectors are supported!");
11938     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11939   }
11940 
11941   if (!VT.isFixedLengthVector())
11942     return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11943 
11944   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11945 
11946   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11947 }
11948 
11949 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11950                                               SelectionDAG &DAG) const {
11951   SDLoc DL(Op);
11952   MVT VT = Op.getSimpleValueType();
11953 
11954   SDValue Src = Op.getOperand(0);
11955   // NOTE: Mask is dropped.
11956   SDValue VL = Op.getOperand(2);
11957 
11958   MVT ContainerVT = VT;
11959   if (VT.isFixedLengthVector()) {
11960     ContainerVT = getContainerForFixedLengthVector(VT);
11961     MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11962     Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11963   }
11964 
11965   MVT XLenVT = Subtarget.getXLenVT();
11966   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11967   SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11968                                   DAG.getUNDEF(ContainerVT), Zero, VL);
11969 
11970   SDValue SplatValue = DAG.getSignedConstant(
11971       Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11972   SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11973                               DAG.getUNDEF(ContainerVT), SplatValue, VL);
11974 
11975   SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11976                                ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11977   if (!VT.isFixedLengthVector())
11978     return Result;
11979   return convertFromScalableVector(VT, Result, DAG, Subtarget);
11980 }
11981 
11982 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11983                                                 SelectionDAG &DAG) const {
11984   SDLoc DL(Op);
11985   MVT VT = Op.getSimpleValueType();
11986 
11987   SDValue Op1 = Op.getOperand(0);
11988   SDValue Op2 = Op.getOperand(1);
11989   ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11990   // NOTE: Mask is dropped.
11991   SDValue VL = Op.getOperand(4);
11992 
11993   MVT ContainerVT = VT;
11994   if (VT.isFixedLengthVector()) {
11995     ContainerVT = getContainerForFixedLengthVector(VT);
11996     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11997     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11998   }
11999 
12000   SDValue Result;
12001   SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12002 
12003   switch (Condition) {
12004   default:
12005     break;
12006   // X != Y  --> (X^Y)
12007   case ISD::SETNE:
12008     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12009     break;
12010   // X == Y  --> ~(X^Y)
12011   case ISD::SETEQ: {
12012     SDValue Temp =
12013         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12014     Result =
12015         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
12016     break;
12017   }
12018   // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
12019   // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
12020   case ISD::SETGT:
12021   case ISD::SETULT: {
12022     SDValue Temp =
12023         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12024     Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
12025     break;
12026   }
12027   // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
12028   // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
12029   case ISD::SETLT:
12030   case ISD::SETUGT: {
12031     SDValue Temp =
12032         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12033     Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
12034     break;
12035   }
12036   // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
12037   // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
12038   case ISD::SETGE:
12039   case ISD::SETULE: {
12040     SDValue Temp =
12041         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12042     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
12043     break;
12044   }
12045   // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
12046   // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
12047   case ISD::SETLE:
12048   case ISD::SETUGE: {
12049     SDValue Temp =
12050         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12051     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
12052     break;
12053   }
12054   }
12055 
12056   if (!VT.isFixedLengthVector())
12057     return Result;
12058   return convertFromScalableVector(VT, Result, DAG, Subtarget);
12059 }
12060 
12061 // Lower Floating-Point/Integer Type-Convert VP SDNodes
12062 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
12063                                                 SelectionDAG &DAG) const {
12064   SDLoc DL(Op);
12065 
12066   SDValue Src = Op.getOperand(0);
12067   SDValue Mask = Op.getOperand(1);
12068   SDValue VL = Op.getOperand(2);
12069   unsigned RISCVISDOpc = getRISCVVLOp(Op);
12070 
12071   MVT DstVT = Op.getSimpleValueType();
12072   MVT SrcVT = Src.getSimpleValueType();
12073   if (DstVT.isFixedLengthVector()) {
12074     DstVT = getContainerForFixedLengthVector(DstVT);
12075     SrcVT = getContainerForFixedLengthVector(SrcVT);
12076     Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12077     MVT MaskVT = getMaskTypeFor(DstVT);
12078     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12079   }
12080 
12081   unsigned DstEltSize = DstVT.getScalarSizeInBits();
12082   unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12083 
12084   SDValue Result;
12085   if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
12086     if (SrcVT.isInteger()) {
12087       assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12088 
12089       unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
12090                                     ? RISCVISD::VSEXT_VL
12091                                     : RISCVISD::VZEXT_VL;
12092 
12093       // Do we need to do any pre-widening before converting?
12094       if (SrcEltSize == 1) {
12095         MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12096         MVT XLenVT = Subtarget.getXLenVT();
12097         SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12098         SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12099                                         DAG.getUNDEF(IntVT), Zero, VL);
12100         SDValue One = DAG.getSignedConstant(
12101             RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12102         SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12103                                        DAG.getUNDEF(IntVT), One, VL);
12104         Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12105                           ZeroSplat, DAG.getUNDEF(IntVT), VL);
12106       } else if (DstEltSize > (2 * SrcEltSize)) {
12107         // Widen before converting.
12108         MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12109                                      DstVT.getVectorElementCount());
12110         Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12111       }
12112 
12113       Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12114     } else {
12115       assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12116              "Wrong input/output vector types");
12117 
12118       // Convert f16 to f32 then convert f32 to i64.
12119       if (DstEltSize > (2 * SrcEltSize)) {
12120         assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12121         MVT InterimFVT =
12122             MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12123         Src =
12124             DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12125       }
12126 
12127       Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12128     }
12129   } else { // Narrowing + Conversion
12130     if (SrcVT.isInteger()) {
12131       assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12132       // First do a narrowing convert to an FP type half the size, then round
12133       // the FP type to a small FP type if needed.
12134 
12135       MVT InterimFVT = DstVT;
12136       if (SrcEltSize > (2 * DstEltSize)) {
12137         assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12138         assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12139         InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12140       }
12141 
12142       Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12143 
12144       if (InterimFVT != DstVT) {
12145         Src = Result;
12146         Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12147       }
12148     } else {
12149       assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12150              "Wrong input/output vector types");
12151       // First do a narrowing conversion to an integer half the size, then
12152       // truncate if needed.
12153 
12154       if (DstEltSize == 1) {
12155         // First convert to the same size integer, then convert to mask using
12156         // setcc.
12157         assert(SrcEltSize >= 16 && "Unexpected FP type!");
12158         MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12159                                           DstVT.getVectorElementCount());
12160         Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12161 
12162         // Compare the integer result to 0. The integer should be 0 or 1/-1,
12163         // otherwise the conversion was undefined.
12164         MVT XLenVT = Subtarget.getXLenVT();
12165         SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12166         SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12167                                 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12168         Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12169                              {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12170                               DAG.getUNDEF(DstVT), Mask, VL});
12171       } else {
12172         MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12173                                           DstVT.getVectorElementCount());
12174 
12175         Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12176 
12177         while (InterimIVT != DstVT) {
12178           SrcEltSize /= 2;
12179           Src = Result;
12180           InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12181                                         DstVT.getVectorElementCount());
12182           Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12183                                Src, Mask, VL);
12184         }
12185       }
12186     }
12187   }
12188 
12189   MVT VT = Op.getSimpleValueType();
12190   if (!VT.isFixedLengthVector())
12191     return Result;
12192   return convertFromScalableVector(VT, Result, DAG, Subtarget);
12193 }
12194 
12195 SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12196                                               SelectionDAG &DAG) const {
12197   SDLoc DL(Op);
12198   MVT VT = Op.getSimpleValueType();
12199   MVT XLenVT = Subtarget.getXLenVT();
12200 
12201   SDValue Mask = Op.getOperand(0);
12202   SDValue TrueVal = Op.getOperand(1);
12203   SDValue FalseVal = Op.getOperand(2);
12204   SDValue VL = Op.getOperand(3);
12205 
12206   // Use default legalization if a vector of EVL type would be legal.
12207   EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12208                                   VT.getVectorElementCount());
12209   if (isTypeLegal(EVLVecVT))
12210     return SDValue();
12211 
12212   MVT ContainerVT = VT;
12213   if (VT.isFixedLengthVector()) {
12214     ContainerVT = getContainerForFixedLengthVector(VT);
12215     Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12216     TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12217     FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12218   }
12219 
12220   // Promote to a vector of i8.
12221   MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12222 
12223   // Promote TrueVal and FalseVal using VLMax.
12224   // FIXME: Is there a better way to do this?
12225   SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12226   SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12227                                  DAG.getUNDEF(PromotedVT),
12228                                  DAG.getConstant(1, DL, XLenVT), VLMax);
12229   SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12230                                   DAG.getUNDEF(PromotedVT),
12231                                   DAG.getConstant(0, DL, XLenVT), VLMax);
12232   TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12233                         SplatZero, DAG.getUNDEF(PromotedVT), VL);
12234   // Any element past VL uses FalseVal, so use VLMax
12235   FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12236                          SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12237 
12238   // VP_MERGE the two promoted values.
12239   SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12240                                 TrueVal, FalseVal, FalseVal, VL);
12241 
12242   // Convert back to mask.
12243   SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12244   SDValue Result = DAG.getNode(
12245       RISCVISD::SETCC_VL, DL, ContainerVT,
12246       {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12247        DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12248 
12249   if (VT.isFixedLengthVector())
12250     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12251   return Result;
12252 }
12253 
12254 SDValue
12255 RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12256                                                SelectionDAG &DAG) const {
12257   SDLoc DL(Op);
12258 
12259   SDValue Op1 = Op.getOperand(0);
12260   SDValue Op2 = Op.getOperand(1);
12261   SDValue Offset = Op.getOperand(2);
12262   SDValue Mask = Op.getOperand(3);
12263   SDValue EVL1 = Op.getOperand(4);
12264   SDValue EVL2 = Op.getOperand(5);
12265 
12266   const MVT XLenVT = Subtarget.getXLenVT();
12267   MVT VT = Op.getSimpleValueType();
12268   MVT ContainerVT = VT;
12269   if (VT.isFixedLengthVector()) {
12270     ContainerVT = getContainerForFixedLengthVector(VT);
12271     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12272     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12273     MVT MaskVT = getMaskTypeFor(ContainerVT);
12274     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12275   }
12276 
12277   bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12278   if (IsMaskVector) {
12279     ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12280 
12281     // Expand input operands
12282     SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12283                                       DAG.getUNDEF(ContainerVT),
12284                                       DAG.getConstant(1, DL, XLenVT), EVL1);
12285     SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12286                                        DAG.getUNDEF(ContainerVT),
12287                                        DAG.getConstant(0, DL, XLenVT), EVL1);
12288     Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12289                       SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12290 
12291     SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12292                                       DAG.getUNDEF(ContainerVT),
12293                                       DAG.getConstant(1, DL, XLenVT), EVL2);
12294     SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12295                                        DAG.getUNDEF(ContainerVT),
12296                                        DAG.getConstant(0, DL, XLenVT), EVL2);
12297     Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12298                       SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12299   }
12300 
12301   int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12302   SDValue DownOffset, UpOffset;
12303   if (ImmValue >= 0) {
12304     // The operand is a TargetConstant, we need to rebuild it as a regular
12305     // constant.
12306     DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12307     UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12308   } else {
12309     // The operand is a TargetConstant, we need to rebuild it as a regular
12310     // constant rather than negating the original operand.
12311     UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12312     DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12313   }
12314 
12315   SDValue SlideDown =
12316       getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12317                     Op1, DownOffset, Mask, UpOffset);
12318   SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12319                                UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12320 
12321   if (IsMaskVector) {
12322     // Truncate Result back to a mask vector (Result has same EVL as Op2)
12323     Result = DAG.getNode(
12324         RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12325         {Result, DAG.getConstant(0, DL, ContainerVT),
12326          DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12327          Mask, EVL2});
12328   }
12329 
12330   if (!VT.isFixedLengthVector())
12331     return Result;
12332   return convertFromScalableVector(VT, Result, DAG, Subtarget);
12333 }
12334 
12335 SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12336                                                       SelectionDAG &DAG) const {
12337   SDLoc DL(Op);
12338   SDValue Val = Op.getOperand(0);
12339   SDValue Mask = Op.getOperand(1);
12340   SDValue VL = Op.getOperand(2);
12341   MVT VT = Op.getSimpleValueType();
12342 
12343   MVT ContainerVT = VT;
12344   if (VT.isFixedLengthVector()) {
12345     ContainerVT = getContainerForFixedLengthVector(VT);
12346     MVT MaskVT = getMaskTypeFor(ContainerVT);
12347     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12348   }
12349 
12350   SDValue Result =
12351       lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12352 
12353   if (!VT.isFixedLengthVector())
12354     return Result;
12355   return convertFromScalableVector(VT, Result, DAG, Subtarget);
12356 }
12357 
12358 SDValue
12359 RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12360                                                 SelectionDAG &DAG) const {
12361   SDLoc DL(Op);
12362   MVT VT = Op.getSimpleValueType();
12363   MVT XLenVT = Subtarget.getXLenVT();
12364 
12365   SDValue Op1 = Op.getOperand(0);
12366   SDValue Mask = Op.getOperand(1);
12367   SDValue EVL = Op.getOperand(2);
12368 
12369   MVT ContainerVT = VT;
12370   if (VT.isFixedLengthVector()) {
12371     ContainerVT = getContainerForFixedLengthVector(VT);
12372     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12373     MVT MaskVT = getMaskTypeFor(ContainerVT);
12374     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12375   }
12376 
12377   MVT GatherVT = ContainerVT;
12378   MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12379   // Check if we are working with mask vectors
12380   bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12381   if (IsMaskVector) {
12382     GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12383 
12384     // Expand input operand
12385     SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12386                                    DAG.getUNDEF(IndicesVT),
12387                                    DAG.getConstant(1, DL, XLenVT), EVL);
12388     SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12389                                     DAG.getUNDEF(IndicesVT),
12390                                     DAG.getConstant(0, DL, XLenVT), EVL);
12391     Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12392                       SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12393   }
12394 
12395   unsigned EltSize = GatherVT.getScalarSizeInBits();
12396   unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12397   unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12398   unsigned MaxVLMAX =
12399       RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12400 
12401   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12402   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12403   // to use vrgatherei16.vv.
12404   // TODO: It's also possible to use vrgatherei16.vv for other types to
12405   // decrease register width for the index calculation.
12406   // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12407   if (MaxVLMAX > 256 && EltSize == 8) {
12408     // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12409     // Split the vector in half and reverse each half using a full register
12410     // reverse.
12411     // Swap the halves and concatenate them.
12412     // Slide the concatenated result by (VLMax - VL).
12413     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12414       auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12415       auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12416 
12417       SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12418       SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12419 
12420       // Reassemble the low and high pieces reversed.
12421       // NOTE: this Result is unmasked (because we do not need masks for
12422       // shuffles). If in the future this has to change, we can use a SELECT_VL
12423       // between Result and UNDEF using the mask originally passed to VP_REVERSE
12424       SDValue Result =
12425           DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12426 
12427       // Slide off any elements from past EVL that were reversed into the low
12428       // elements.
12429       unsigned MinElts = GatherVT.getVectorMinNumElements();
12430       SDValue VLMax =
12431           DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12432       SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12433 
12434       Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12435                              DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12436 
12437       if (IsMaskVector) {
12438         // Truncate Result back to a mask vector
12439         Result =
12440             DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12441                         {Result, DAG.getConstant(0, DL, GatherVT),
12442                          DAG.getCondCode(ISD::SETNE),
12443                          DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12444       }
12445 
12446       if (!VT.isFixedLengthVector())
12447         return Result;
12448       return convertFromScalableVector(VT, Result, DAG, Subtarget);
12449     }
12450 
12451     // Just promote the int type to i16 which will double the LMUL.
12452     IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12453     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12454   }
12455 
12456   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12457   SDValue VecLen =
12458       DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12459   SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12460                                     DAG.getUNDEF(IndicesVT), VecLen, EVL);
12461   SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12462                               DAG.getUNDEF(IndicesVT), Mask, EVL);
12463   SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12464                                DAG.getUNDEF(GatherVT), Mask, EVL);
12465 
12466   if (IsMaskVector) {
12467     // Truncate Result back to a mask vector
12468     Result = DAG.getNode(
12469         RISCVISD::SETCC_VL, DL, ContainerVT,
12470         {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12471          DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12472   }
12473 
12474   if (!VT.isFixedLengthVector())
12475     return Result;
12476   return convertFromScalableVector(VT, Result, DAG, Subtarget);
12477 }
12478 
12479 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12480                                             SelectionDAG &DAG) const {
12481   MVT VT = Op.getSimpleValueType();
12482   if (VT.getVectorElementType() != MVT::i1)
12483     return lowerVPOp(Op, DAG);
12484 
12485   // It is safe to drop mask parameter as masked-off elements are undef.
12486   SDValue Op1 = Op->getOperand(0);
12487   SDValue Op2 = Op->getOperand(1);
12488   SDValue VL = Op->getOperand(3);
12489 
12490   MVT ContainerVT = VT;
12491   const bool IsFixed = VT.isFixedLengthVector();
12492   if (IsFixed) {
12493     ContainerVT = getContainerForFixedLengthVector(VT);
12494     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12495     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12496   }
12497 
12498   SDLoc DL(Op);
12499   SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12500   if (!IsFixed)
12501     return Val;
12502   return convertFromScalableVector(VT, Val, DAG, Subtarget);
12503 }
12504 
12505 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12506                                                 SelectionDAG &DAG) const {
12507   SDLoc DL(Op);
12508   MVT XLenVT = Subtarget.getXLenVT();
12509   MVT VT = Op.getSimpleValueType();
12510   MVT ContainerVT = VT;
12511   if (VT.isFixedLengthVector())
12512     ContainerVT = getContainerForFixedLengthVector(VT);
12513 
12514   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12515 
12516   auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12517   // Check if the mask is known to be all ones
12518   SDValue Mask = VPNode->getMask();
12519   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12520 
12521   SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12522                                                    : Intrinsic::riscv_vlse_mask,
12523                                         DL, XLenVT);
12524   SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12525                               DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12526                               VPNode->getStride()};
12527   if (!IsUnmasked) {
12528     if (VT.isFixedLengthVector()) {
12529       MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12530       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12531     }
12532     Ops.push_back(Mask);
12533   }
12534   Ops.push_back(VPNode->getVectorLength());
12535   if (!IsUnmasked) {
12536     SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12537     Ops.push_back(Policy);
12538   }
12539 
12540   SDValue Result =
12541       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
12542                               VPNode->getMemoryVT(), VPNode->getMemOperand());
12543   SDValue Chain = Result.getValue(1);
12544 
12545   if (VT.isFixedLengthVector())
12546     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12547 
12548   return DAG.getMergeValues({Result, Chain}, DL);
12549 }
12550 
12551 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12552                                                  SelectionDAG &DAG) const {
12553   SDLoc DL(Op);
12554   MVT XLenVT = Subtarget.getXLenVT();
12555 
12556   auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12557   SDValue StoreVal = VPNode->getValue();
12558   MVT VT = StoreVal.getSimpleValueType();
12559   MVT ContainerVT = VT;
12560   if (VT.isFixedLengthVector()) {
12561     ContainerVT = getContainerForFixedLengthVector(VT);
12562     StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12563   }
12564 
12565   // Check if the mask is known to be all ones
12566   SDValue Mask = VPNode->getMask();
12567   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12568 
12569   SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12570                                                    : Intrinsic::riscv_vsse_mask,
12571                                         DL, XLenVT);
12572   SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12573                               VPNode->getBasePtr(), VPNode->getStride()};
12574   if (!IsUnmasked) {
12575     if (VT.isFixedLengthVector()) {
12576       MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12577       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12578     }
12579     Ops.push_back(Mask);
12580   }
12581   Ops.push_back(VPNode->getVectorLength());
12582 
12583   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12584                                  Ops, VPNode->getMemoryVT(),
12585                                  VPNode->getMemOperand());
12586 }
12587 
12588 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12589 // matched to a RVV indexed load. The RVV indexed load instructions only
12590 // support the "unsigned unscaled" addressing mode; indices are implicitly
12591 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
12592 // signed or scaled indexing is extended to the XLEN value type and scaled
12593 // accordingly.
12594 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12595                                                SelectionDAG &DAG) const {
12596   SDLoc DL(Op);
12597   MVT VT = Op.getSimpleValueType();
12598 
12599   const auto *MemSD = cast<MemSDNode>(Op.getNode());
12600   EVT MemVT = MemSD->getMemoryVT();
12601   MachineMemOperand *MMO = MemSD->getMemOperand();
12602   SDValue Chain = MemSD->getChain();
12603   SDValue BasePtr = MemSD->getBasePtr();
12604 
12605   [[maybe_unused]] ISD::LoadExtType LoadExtType;
12606   SDValue Index, Mask, PassThru, VL;
12607 
12608   if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12609     Index = VPGN->getIndex();
12610     Mask = VPGN->getMask();
12611     PassThru = DAG.getUNDEF(VT);
12612     VL = VPGN->getVectorLength();
12613     // VP doesn't support extending loads.
12614     LoadExtType = ISD::NON_EXTLOAD;
12615   } else {
12616     // Else it must be a MGATHER.
12617     auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12618     Index = MGN->getIndex();
12619     Mask = MGN->getMask();
12620     PassThru = MGN->getPassThru();
12621     LoadExtType = MGN->getExtensionType();
12622   }
12623 
12624   MVT IndexVT = Index.getSimpleValueType();
12625   MVT XLenVT = Subtarget.getXLenVT();
12626 
12627   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
12628          "Unexpected VTs!");
12629   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12630   // Targets have to explicitly opt-in for extending vector loads.
12631   assert(LoadExtType == ISD::NON_EXTLOAD &&
12632          "Unexpected extending MGATHER/VP_GATHER");
12633 
12634   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12635   // the selection of the masked intrinsics doesn't do this for us.
12636   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12637 
12638   MVT ContainerVT = VT;
12639   if (VT.isFixedLengthVector()) {
12640     ContainerVT = getContainerForFixedLengthVector(VT);
12641     IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12642                                ContainerVT.getVectorElementCount());
12643 
12644     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12645 
12646     if (!IsUnmasked) {
12647       MVT MaskVT = getMaskTypeFor(ContainerVT);
12648       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12649       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12650     }
12651   }
12652 
12653   if (!VL)
12654     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12655 
12656   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12657     IndexVT = IndexVT.changeVectorElementType(XLenVT);
12658     Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12659   }
12660 
12661   unsigned IntID =
12662       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12663   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12664   if (IsUnmasked)
12665     Ops.push_back(DAG.getUNDEF(ContainerVT));
12666   else
12667     Ops.push_back(PassThru);
12668   Ops.push_back(BasePtr);
12669   Ops.push_back(Index);
12670   if (!IsUnmasked)
12671     Ops.push_back(Mask);
12672   Ops.push_back(VL);
12673   if (!IsUnmasked)
12674     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
12675 
12676   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12677   SDValue Result =
12678       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12679   Chain = Result.getValue(1);
12680 
12681   if (VT.isFixedLengthVector())
12682     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12683 
12684   return DAG.getMergeValues({Result, Chain}, DL);
12685 }
12686 
12687 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12688 // matched to a RVV indexed store. The RVV indexed store instructions only
12689 // support the "unsigned unscaled" addressing mode; indices are implicitly
12690 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
12691 // signed or scaled indexing is extended to the XLEN value type and scaled
12692 // accordingly.
12693 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12694                                                 SelectionDAG &DAG) const {
12695   SDLoc DL(Op);
12696   const auto *MemSD = cast<MemSDNode>(Op.getNode());
12697   EVT MemVT = MemSD->getMemoryVT();
12698   MachineMemOperand *MMO = MemSD->getMemOperand();
12699   SDValue Chain = MemSD->getChain();
12700   SDValue BasePtr = MemSD->getBasePtr();
12701 
12702   [[maybe_unused]] bool IsTruncatingStore = false;
12703   SDValue Index, Mask, Val, VL;
12704 
12705   if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12706     Index = VPSN->getIndex();
12707     Mask = VPSN->getMask();
12708     Val = VPSN->getValue();
12709     VL = VPSN->getVectorLength();
12710     // VP doesn't support truncating stores.
12711     IsTruncatingStore = false;
12712   } else {
12713     // Else it must be a MSCATTER.
12714     auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12715     Index = MSN->getIndex();
12716     Mask = MSN->getMask();
12717     Val = MSN->getValue();
12718     IsTruncatingStore = MSN->isTruncatingStore();
12719   }
12720 
12721   MVT VT = Val.getSimpleValueType();
12722   MVT IndexVT = Index.getSimpleValueType();
12723   MVT XLenVT = Subtarget.getXLenVT();
12724 
12725   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
12726          "Unexpected VTs!");
12727   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12728   // Targets have to explicitly opt-in for extending vector loads and
12729   // truncating vector stores.
12730   assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12731 
12732   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12733   // the selection of the masked intrinsics doesn't do this for us.
12734   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12735 
12736   MVT ContainerVT = VT;
12737   if (VT.isFixedLengthVector()) {
12738     ContainerVT = getContainerForFixedLengthVector(VT);
12739     IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12740                                ContainerVT.getVectorElementCount());
12741 
12742     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12743     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12744 
12745     if (!IsUnmasked) {
12746       MVT MaskVT = getMaskTypeFor(ContainerVT);
12747       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12748     }
12749   }
12750 
12751   if (!VL)
12752     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12753 
12754   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12755     IndexVT = IndexVT.changeVectorElementType(XLenVT);
12756     Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12757   }
12758 
12759   unsigned IntID =
12760       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12761   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12762   Ops.push_back(Val);
12763   Ops.push_back(BasePtr);
12764   Ops.push_back(Index);
12765   if (!IsUnmasked)
12766     Ops.push_back(Mask);
12767   Ops.push_back(VL);
12768 
12769   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
12770                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12771 }
12772 
12773 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12774                                                SelectionDAG &DAG) const {
12775   const MVT XLenVT = Subtarget.getXLenVT();
12776   SDLoc DL(Op);
12777   SDValue Chain = Op->getOperand(0);
12778   SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12779   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12780   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12781 
12782   // Encoding used for rounding mode in RISC-V differs from that used in
12783   // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12784   // table, which consists of a sequence of 4-bit fields, each representing
12785   // corresponding FLT_ROUNDS mode.
12786   static const int Table =
12787       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
12788       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
12789       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
12790       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
12791       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
12792 
12793   SDValue Shift =
12794       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12795   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12796                                 DAG.getConstant(Table, DL, XLenVT), Shift);
12797   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12798                                DAG.getConstant(7, DL, XLenVT));
12799 
12800   return DAG.getMergeValues({Masked, Chain}, DL);
12801 }
12802 
12803 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12804                                                SelectionDAG &DAG) const {
12805   const MVT XLenVT = Subtarget.getXLenVT();
12806   SDLoc DL(Op);
12807   SDValue Chain = Op->getOperand(0);
12808   SDValue RMValue = Op->getOperand(1);
12809   SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12810 
12811   // Encoding used for rounding mode in RISC-V differs from that used in
12812   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12813   // a table, which consists of a sequence of 4-bit fields, each representing
12814   // corresponding RISC-V mode.
12815   static const unsigned Table =
12816       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
12817       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
12818       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
12819       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
12820       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
12821 
12822   RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12823 
12824   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12825                               DAG.getConstant(2, DL, XLenVT));
12826   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12827                                 DAG.getConstant(Table, DL, XLenVT), Shift);
12828   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12829                         DAG.getConstant(0x7, DL, XLenVT));
12830   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12831                      RMValue);
12832 }
12833 
12834 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12835                                                SelectionDAG &DAG) const {
12836   MachineFunction &MF = DAG.getMachineFunction();
12837 
12838   bool isRISCV64 = Subtarget.is64Bit();
12839   EVT PtrVT = getPointerTy(DAG.getDataLayout());
12840 
12841   int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12842   return DAG.getFrameIndex(FI, PtrVT);
12843 }
12844 
12845 // Returns the opcode of the target-specific SDNode that implements the 32-bit
12846 // form of the given Opcode.
12847 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12848   switch (Opcode) {
12849   default:
12850     llvm_unreachable("Unexpected opcode");
12851   case ISD::SHL:
12852     return RISCVISD::SLLW;
12853   case ISD::SRA:
12854     return RISCVISD::SRAW;
12855   case ISD::SRL:
12856     return RISCVISD::SRLW;
12857   case ISD::SDIV:
12858     return RISCVISD::DIVW;
12859   case ISD::UDIV:
12860     return RISCVISD::DIVUW;
12861   case ISD::UREM:
12862     return RISCVISD::REMUW;
12863   case ISD::ROTL:
12864     return RISCVISD::ROLW;
12865   case ISD::ROTR:
12866     return RISCVISD::RORW;
12867   }
12868 }
12869 
12870 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12871 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12872 // otherwise be promoted to i64, making it difficult to select the
12873 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12874 // type i8/i16/i32 is lost.
12875 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
12876                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
12877   SDLoc DL(N);
12878   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12879   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12880   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12881   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12882   // ReplaceNodeResults requires we maintain the same type for the return value.
12883   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12884 }
12885 
12886 // Converts the given 32-bit operation to a i64 operation with signed extension
12887 // semantic to reduce the signed extension instructions.
12888 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
12889   SDLoc DL(N);
12890   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12891   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12892   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12893   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12894                                DAG.getValueType(MVT::i32));
12895   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12896 }
12897 
12898 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
12899                                              SmallVectorImpl<SDValue> &Results,
12900                                              SelectionDAG &DAG) const {
12901   SDLoc DL(N);
12902   switch (N->getOpcode()) {
12903   default:
12904     llvm_unreachable("Don't know how to custom type legalize this operation!");
12905   case ISD::STRICT_FP_TO_SINT:
12906   case ISD::STRICT_FP_TO_UINT:
12907   case ISD::FP_TO_SINT:
12908   case ISD::FP_TO_UINT: {
12909     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12910            "Unexpected custom legalisation");
12911     bool IsStrict = N->isStrictFPOpcode();
12912     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12913                     N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12914     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12915     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12916         TargetLowering::TypeSoftenFloat) {
12917       if (!isTypeLegal(Op0.getValueType()))
12918         return;
12919       if (IsStrict) {
12920         SDValue Chain = N->getOperand(0);
12921         // In absense of Zfh, promote f16 to f32, then convert.
12922         if (Op0.getValueType() == MVT::f16 &&
12923             !Subtarget.hasStdExtZfhOrZhinx()) {
12924           Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12925                             {Chain, Op0});
12926           Chain = Op0.getValue(1);
12927         }
12928         unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12929                                 : RISCVISD::STRICT_FCVT_WU_RV64;
12930         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12931         SDValue Res = DAG.getNode(
12932             Opc, DL, VTs, Chain, Op0,
12933             DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12934         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12935         Results.push_back(Res.getValue(1));
12936         return;
12937       }
12938       // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12939       // convert.
12940       if ((Op0.getValueType() == MVT::f16 &&
12941            !Subtarget.hasStdExtZfhOrZhinx()) ||
12942           Op0.getValueType() == MVT::bf16)
12943         Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12944 
12945       unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12946       SDValue Res =
12947           DAG.getNode(Opc, DL, MVT::i64, Op0,
12948                       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12949       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12950       return;
12951     }
12952     // If the FP type needs to be softened, emit a library call using the 'si'
12953     // version. If we left it to default legalization we'd end up with 'di'. If
12954     // the FP type doesn't need to be softened just let generic type
12955     // legalization promote the result type.
12956     RTLIB::Libcall LC;
12957     if (IsSigned)
12958       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12959     else
12960       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12961     MakeLibCallOptions CallOptions;
12962     EVT OpVT = Op0.getValueType();
12963     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12964     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12965     SDValue Result;
12966     std::tie(Result, Chain) =
12967         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12968     Results.push_back(Result);
12969     if (IsStrict)
12970       Results.push_back(Chain);
12971     break;
12972   }
12973   case ISD::LROUND: {
12974     SDValue Op0 = N->getOperand(0);
12975     EVT Op0VT = Op0.getValueType();
12976     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12977         TargetLowering::TypeSoftenFloat) {
12978       if (!isTypeLegal(Op0VT))
12979         return;
12980 
12981       // In absense of Zfh, promote f16 to f32, then convert.
12982       if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12983         Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12984 
12985       SDValue Res =
12986           DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12987                       DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12988       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12989       return;
12990     }
12991     // If the FP type needs to be softened, emit a library call to lround. We'll
12992     // need to truncate the result. We assume any value that doesn't fit in i32
12993     // is allowed to return an unspecified value.
12994     RTLIB::Libcall LC =
12995         Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12996     MakeLibCallOptions CallOptions;
12997     EVT OpVT = Op0.getValueType();
12998     CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12999     SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
13000     Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
13001     Results.push_back(Result);
13002     break;
13003   }
13004   case ISD::READCYCLECOUNTER:
13005   case ISD::READSTEADYCOUNTER: {
13006     assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
13007                                    "has custom type legalization on riscv32");
13008 
13009     SDValue LoCounter, HiCounter;
13010     MVT XLenVT = Subtarget.getXLenVT();
13011     if (N->getOpcode() == ISD::READCYCLECOUNTER) {
13012       LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
13013       HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
13014     } else {
13015       LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
13016       HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
13017     }
13018     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
13019     SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
13020                               N->getOperand(0), LoCounter, HiCounter);
13021 
13022     Results.push_back(
13023         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
13024     Results.push_back(RCW.getValue(2));
13025     break;
13026   }
13027   case ISD::LOAD: {
13028     if (!ISD::isNON_EXTLoad(N))
13029       return;
13030 
13031     // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
13032     // sext_inreg we emit for ADD/SUB/MUL/SLLI.
13033     LoadSDNode *Ld = cast<LoadSDNode>(N);
13034 
13035     SDLoc dl(N);
13036     SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
13037                                  Ld->getBasePtr(), Ld->getMemoryVT(),
13038                                  Ld->getMemOperand());
13039     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
13040     Results.push_back(Res.getValue(1));
13041     return;
13042   }
13043   case ISD::MUL: {
13044     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
13045     unsigned XLen = Subtarget.getXLen();
13046     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
13047     if (Size > XLen) {
13048       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
13049       SDValue LHS = N->getOperand(0);
13050       SDValue RHS = N->getOperand(1);
13051       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
13052 
13053       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
13054       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
13055       // We need exactly one side to be unsigned.
13056       if (LHSIsU == RHSIsU)
13057         return;
13058 
13059       auto MakeMULPair = [&](SDValue S, SDValue U) {
13060         MVT XLenVT = Subtarget.getXLenVT();
13061         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
13062         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
13063         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
13064         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
13065         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
13066       };
13067 
13068       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13069       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13070 
13071       // The other operand should be signed, but still prefer MULH when
13072       // possible.
13073       if (RHSIsU && LHSIsS && !RHSIsS)
13074         Results.push_back(MakeMULPair(LHS, RHS));
13075       else if (LHSIsU && RHSIsS && !LHSIsS)
13076         Results.push_back(MakeMULPair(RHS, LHS));
13077 
13078       return;
13079     }
13080     [[fallthrough]];
13081   }
13082   case ISD::ADD:
13083   case ISD::SUB:
13084     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13085            "Unexpected custom legalisation");
13086     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13087     break;
13088   case ISD::SHL:
13089   case ISD::SRA:
13090   case ISD::SRL:
13091     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13092            "Unexpected custom legalisation");
13093     if (N->getOperand(1).getOpcode() != ISD::Constant) {
13094       // If we can use a BSET instruction, allow default promotion to apply.
13095       if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13096           isOneConstant(N->getOperand(0)))
13097         break;
13098       Results.push_back(customLegalizeToWOp(N, DAG));
13099       break;
13100     }
13101 
13102     // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13103     // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13104     // shift amount.
13105     if (N->getOpcode() == ISD::SHL) {
13106       SDLoc DL(N);
13107       SDValue NewOp0 =
13108           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13109       SDValue NewOp1 =
13110           DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13111       SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13112       SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13113                                    DAG.getValueType(MVT::i32));
13114       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13115     }
13116 
13117     break;
13118   case ISD::ROTL:
13119   case ISD::ROTR:
13120     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13121            "Unexpected custom legalisation");
13122     assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13123             Subtarget.hasVendorXTHeadBb()) &&
13124            "Unexpected custom legalization");
13125     if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13126         !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13127       return;
13128     Results.push_back(customLegalizeToWOp(N, DAG));
13129     break;
13130   case ISD::CTTZ:
13131   case ISD::CTTZ_ZERO_UNDEF:
13132   case ISD::CTLZ:
13133   case ISD::CTLZ_ZERO_UNDEF: {
13134     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13135            "Unexpected custom legalisation");
13136 
13137     SDValue NewOp0 =
13138         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13139     bool IsCTZ =
13140         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13141     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13142     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13143     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13144     return;
13145   }
13146   case ISD::SDIV:
13147   case ISD::UDIV:
13148   case ISD::UREM: {
13149     MVT VT = N->getSimpleValueType(0);
13150     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13151            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13152            "Unexpected custom legalisation");
13153     // Don't promote division/remainder by constant since we should expand those
13154     // to multiply by magic constant.
13155     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
13156     if (N->getOperand(1).getOpcode() == ISD::Constant &&
13157         !isIntDivCheap(N->getValueType(0), Attr))
13158       return;
13159 
13160     // If the input is i32, use ANY_EXTEND since the W instructions don't read
13161     // the upper 32 bits. For other types we need to sign or zero extend
13162     // based on the opcode.
13163     unsigned ExtOpc = ISD::ANY_EXTEND;
13164     if (VT != MVT::i32)
13165       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13166                                            : ISD::ZERO_EXTEND;
13167 
13168     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13169     break;
13170   }
13171   case ISD::SADDO: {
13172     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13173            "Unexpected custom legalisation");
13174 
13175     // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13176     // use the default legalization.
13177     if (!isa<ConstantSDNode>(N->getOperand(1)))
13178       return;
13179 
13180     SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13181     SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13182     SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13183     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13184                       DAG.getValueType(MVT::i32));
13185 
13186     SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13187 
13188     // For an addition, the result should be less than one of the operands (LHS)
13189     // if and only if the other operand (RHS) is negative, otherwise there will
13190     // be overflow.
13191     // For a subtraction, the result should be less than one of the operands
13192     // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13193     // otherwise there will be overflow.
13194     EVT OType = N->getValueType(1);
13195     SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13196     SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13197 
13198     SDValue Overflow =
13199         DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13200     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13201     Results.push_back(Overflow);
13202     return;
13203   }
13204   case ISD::UADDO:
13205   case ISD::USUBO: {
13206     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13207            "Unexpected custom legalisation");
13208     bool IsAdd = N->getOpcode() == ISD::UADDO;
13209     // Create an ADDW or SUBW.
13210     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13211     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13212     SDValue Res =
13213         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13214     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13215                       DAG.getValueType(MVT::i32));
13216 
13217     SDValue Overflow;
13218     if (IsAdd && isOneConstant(RHS)) {
13219       // Special case uaddo X, 1 overflowed if the addition result is 0.
13220       // The general case (X + C) < C is not necessarily beneficial. Although we
13221       // reduce the live range of X, we may introduce the materialization of
13222       // constant C, especially when the setcc result is used by branch. We have
13223       // no compare with constant and branch instructions.
13224       Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13225                               DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13226     } else if (IsAdd && isAllOnesConstant(RHS)) {
13227       // Special case uaddo X, -1 overflowed if X != 0.
13228       Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13229                               DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13230     } else {
13231       // Sign extend the LHS and perform an unsigned compare with the ADDW
13232       // result. Since the inputs are sign extended from i32, this is equivalent
13233       // to comparing the lower 32 bits.
13234       LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13235       Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13236                               IsAdd ? ISD::SETULT : ISD::SETUGT);
13237     }
13238 
13239     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13240     Results.push_back(Overflow);
13241     return;
13242   }
13243   case ISD::UADDSAT:
13244   case ISD::USUBSAT: {
13245     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13246            !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13247     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13248     // promotion for UADDO/USUBO.
13249     Results.push_back(expandAddSubSat(N, DAG));
13250     return;
13251   }
13252   case ISD::SADDSAT:
13253   case ISD::SSUBSAT: {
13254     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13255            "Unexpected custom legalisation");
13256     Results.push_back(expandAddSubSat(N, DAG));
13257     return;
13258   }
13259   case ISD::ABS: {
13260     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13261            "Unexpected custom legalisation");
13262 
13263     if (Subtarget.hasStdExtZbb()) {
13264       // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13265       // This allows us to remember that the result is sign extended. Expanding
13266       // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13267       SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13268                                 N->getOperand(0));
13269       SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13270       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13271       return;
13272     }
13273 
13274     // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13275     SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13276 
13277     // Freeze the source so we can increase it's use count.
13278     Src = DAG.getFreeze(Src);
13279 
13280     // Copy sign bit to all bits using the sraiw pattern.
13281     SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13282                                    DAG.getValueType(MVT::i32));
13283     SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13284                            DAG.getConstant(31, DL, MVT::i64));
13285 
13286     SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13287     NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13288 
13289     // NOTE: The result is only required to be anyextended, but sext is
13290     // consistent with type legalization of sub.
13291     NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13292                          DAG.getValueType(MVT::i32));
13293     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13294     return;
13295   }
13296   case ISD::BITCAST: {
13297     EVT VT = N->getValueType(0);
13298     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13299     SDValue Op0 = N->getOperand(0);
13300     EVT Op0VT = Op0.getValueType();
13301     MVT XLenVT = Subtarget.getXLenVT();
13302     if (VT == MVT::i16 &&
13303         ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13304          (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13305       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13306       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13307     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13308                Subtarget.hasStdExtFOrZfinx()) {
13309       SDValue FPConv =
13310           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13311       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13312     } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13313                Subtarget.hasStdExtDOrZdinx()) {
13314       SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13315                                    DAG.getVTList(MVT::i32, MVT::i32), Op0);
13316       SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13317                                    NewReg.getValue(0), NewReg.getValue(1));
13318       Results.push_back(RetReg);
13319     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13320                isTypeLegal(Op0VT)) {
13321       // Custom-legalize bitcasts from fixed-length vector types to illegal
13322       // scalar types in order to improve codegen. Bitcast the vector to a
13323       // one-element vector type whose element type is the same as the result
13324       // type, and extract the first element.
13325       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13326       if (isTypeLegal(BVT)) {
13327         SDValue BVec = DAG.getBitcast(BVT, Op0);
13328         Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13329                                       DAG.getVectorIdxConstant(0, DL)));
13330       }
13331     }
13332     break;
13333   }
13334   case RISCVISD::BREV8:
13335   case RISCVISD::ORC_B: {
13336     MVT VT = N->getSimpleValueType(0);
13337     MVT XLenVT = Subtarget.getXLenVT();
13338     assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13339            "Unexpected custom legalisation");
13340     assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13341             (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13342            "Unexpected extension");
13343     SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13344     SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13345     // ReplaceNodeResults requires we maintain the same type for the return
13346     // value.
13347     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13348     break;
13349   }
13350   case ISD::EXTRACT_VECTOR_ELT: {
13351     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13352     // type is illegal (currently only vXi64 RV32).
13353     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13354     // transferred to the destination register. We issue two of these from the
13355     // upper- and lower- halves of the SEW-bit vector element, slid down to the
13356     // first element.
13357     SDValue Vec = N->getOperand(0);
13358     SDValue Idx = N->getOperand(1);
13359 
13360     // The vector type hasn't been legalized yet so we can't issue target
13361     // specific nodes if it needs legalization.
13362     // FIXME: We would manually legalize if it's important.
13363     if (!isTypeLegal(Vec.getValueType()))
13364       return;
13365 
13366     MVT VecVT = Vec.getSimpleValueType();
13367 
13368     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13369            VecVT.getVectorElementType() == MVT::i64 &&
13370            "Unexpected EXTRACT_VECTOR_ELT legalization");
13371 
13372     // If this is a fixed vector, we need to convert it to a scalable vector.
13373     MVT ContainerVT = VecVT;
13374     if (VecVT.isFixedLengthVector()) {
13375       ContainerVT = getContainerForFixedLengthVector(VecVT);
13376       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13377     }
13378 
13379     MVT XLenVT = Subtarget.getXLenVT();
13380 
13381     // Use a VL of 1 to avoid processing more elements than we need.
13382     auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13383 
13384     // Unless the index is known to be 0, we must slide the vector down to get
13385     // the desired element into index 0.
13386     if (!isNullConstant(Idx)) {
13387       Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13388                           DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13389     }
13390 
13391     // Extract the lower XLEN bits of the correct vector element.
13392     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13393 
13394     // To extract the upper XLEN bits of the vector element, shift the first
13395     // element right by 32 bits and re-extract the lower XLEN bits.
13396     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13397                                      DAG.getUNDEF(ContainerVT),
13398                                      DAG.getConstant(32, DL, XLenVT), VL);
13399     SDValue LShr32 =
13400         DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13401                     DAG.getUNDEF(ContainerVT), Mask, VL);
13402 
13403     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13404 
13405     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13406     break;
13407   }
13408   case ISD::INTRINSIC_WO_CHAIN: {
13409     unsigned IntNo = N->getConstantOperandVal(0);
13410     switch (IntNo) {
13411     default:
13412       llvm_unreachable(
13413           "Don't know how to custom type legalize this intrinsic!");
13414     case Intrinsic::experimental_get_vector_length: {
13415       SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13416       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13417       return;
13418     }
13419     case Intrinsic::experimental_cttz_elts: {
13420       SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13421       Results.push_back(
13422           DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13423       return;
13424     }
13425     case Intrinsic::riscv_orc_b:
13426     case Intrinsic::riscv_brev8:
13427     case Intrinsic::riscv_sha256sig0:
13428     case Intrinsic::riscv_sha256sig1:
13429     case Intrinsic::riscv_sha256sum0:
13430     case Intrinsic::riscv_sha256sum1:
13431     case Intrinsic::riscv_sm3p0:
13432     case Intrinsic::riscv_sm3p1: {
13433       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13434         return;
13435       unsigned Opc;
13436       switch (IntNo) {
13437       case Intrinsic::riscv_orc_b:      Opc = RISCVISD::ORC_B;      break;
13438       case Intrinsic::riscv_brev8:      Opc = RISCVISD::BREV8;      break;
13439       case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13440       case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13441       case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13442       case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13443       case Intrinsic::riscv_sm3p0:      Opc = RISCVISD::SM3P0;      break;
13444       case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;
13445       }
13446 
13447       SDValue NewOp =
13448           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13449       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13450       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13451       return;
13452     }
13453     case Intrinsic::riscv_sm4ks:
13454     case Intrinsic::riscv_sm4ed: {
13455       unsigned Opc =
13456           IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13457       SDValue NewOp0 =
13458           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13459       SDValue NewOp1 =
13460           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13461       SDValue Res =
13462           DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13463       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13464       return;
13465     }
13466     case Intrinsic::riscv_mopr: {
13467       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13468         return;
13469       SDValue NewOp =
13470           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13471       SDValue Res = DAG.getNode(
13472           RISCVISD::MOPR, DL, MVT::i64, NewOp,
13473           DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13474       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13475       return;
13476     }
13477     case Intrinsic::riscv_moprr: {
13478       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13479         return;
13480       SDValue NewOp0 =
13481           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13482       SDValue NewOp1 =
13483           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13484       SDValue Res = DAG.getNode(
13485           RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13486           DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13487       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13488       return;
13489     }
13490     case Intrinsic::riscv_clmul: {
13491       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13492         return;
13493 
13494       SDValue NewOp0 =
13495           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13496       SDValue NewOp1 =
13497           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13498       SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13499       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13500       return;
13501     }
13502     case Intrinsic::riscv_clmulh:
13503     case Intrinsic::riscv_clmulr: {
13504       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13505         return;
13506 
13507       // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13508       // to the full 128-bit clmul result of multiplying two xlen values.
13509       // Perform clmulr or clmulh on the shifted values. Finally, extract the
13510       // upper 32 bits.
13511       //
13512       // The alternative is to mask the inputs to 32 bits and use clmul, but
13513       // that requires two shifts to mask each input without zext.w.
13514       // FIXME: If the inputs are known zero extended or could be freely
13515       // zero extended, the mask form would be better.
13516       SDValue NewOp0 =
13517           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13518       SDValue NewOp1 =
13519           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13520       NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13521                            DAG.getConstant(32, DL, MVT::i64));
13522       NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13523                            DAG.getConstant(32, DL, MVT::i64));
13524       unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13525                                                       : RISCVISD::CLMULR;
13526       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13527       Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13528                         DAG.getConstant(32, DL, MVT::i64));
13529       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13530       return;
13531     }
13532     case Intrinsic::riscv_vmv_x_s: {
13533       EVT VT = N->getValueType(0);
13534       MVT XLenVT = Subtarget.getXLenVT();
13535       if (VT.bitsLT(XLenVT)) {
13536         // Simple case just extract using vmv.x.s and truncate.
13537         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13538                                       Subtarget.getXLenVT(), N->getOperand(1));
13539         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13540         return;
13541       }
13542 
13543       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13544              "Unexpected custom legalization");
13545 
13546       // We need to do the move in two steps.
13547       SDValue Vec = N->getOperand(1);
13548       MVT VecVT = Vec.getSimpleValueType();
13549 
13550       // First extract the lower XLEN bits of the element.
13551       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13552 
13553       // To extract the upper XLEN bits of the vector element, shift the first
13554       // element right by 32 bits and re-extract the lower XLEN bits.
13555       auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13556 
13557       SDValue ThirtyTwoV =
13558           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13559                       DAG.getConstant(32, DL, XLenVT), VL);
13560       SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13561                                    DAG.getUNDEF(VecVT), Mask, VL);
13562       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13563 
13564       Results.push_back(
13565           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13566       break;
13567     }
13568     }
13569     break;
13570   }
13571   case ISD::VECREDUCE_ADD:
13572   case ISD::VECREDUCE_AND:
13573   case ISD::VECREDUCE_OR:
13574   case ISD::VECREDUCE_XOR:
13575   case ISD::VECREDUCE_SMAX:
13576   case ISD::VECREDUCE_UMAX:
13577   case ISD::VECREDUCE_SMIN:
13578   case ISD::VECREDUCE_UMIN:
13579     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13580       Results.push_back(V);
13581     break;
13582   case ISD::VP_REDUCE_ADD:
13583   case ISD::VP_REDUCE_AND:
13584   case ISD::VP_REDUCE_OR:
13585   case ISD::VP_REDUCE_XOR:
13586   case ISD::VP_REDUCE_SMAX:
13587   case ISD::VP_REDUCE_UMAX:
13588   case ISD::VP_REDUCE_SMIN:
13589   case ISD::VP_REDUCE_UMIN:
13590     if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13591       Results.push_back(V);
13592     break;
13593   case ISD::GET_ROUNDING: {
13594     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13595     SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13596     Results.push_back(Res.getValue(0));
13597     Results.push_back(Res.getValue(1));
13598     break;
13599   }
13600   }
13601 }
13602 
13603 /// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13604 /// which corresponds to it.
13605 static unsigned getVecReduceOpcode(unsigned Opc) {
13606   switch (Opc) {
13607   default:
13608     llvm_unreachable("Unhandled binary to transfrom reduction");
13609   case ISD::ADD:
13610     return ISD::VECREDUCE_ADD;
13611   case ISD::UMAX:
13612     return ISD::VECREDUCE_UMAX;
13613   case ISD::SMAX:
13614     return ISD::VECREDUCE_SMAX;
13615   case ISD::UMIN:
13616     return ISD::VECREDUCE_UMIN;
13617   case ISD::SMIN:
13618     return ISD::VECREDUCE_SMIN;
13619   case ISD::AND:
13620     return ISD::VECREDUCE_AND;
13621   case ISD::OR:
13622     return ISD::VECREDUCE_OR;
13623   case ISD::XOR:
13624     return ISD::VECREDUCE_XOR;
13625   case ISD::FADD:
13626     // Note: This is the associative form of the generic reduction opcode.
13627     return ISD::VECREDUCE_FADD;
13628   }
13629 }
13630 
13631 /// Perform two related transforms whose purpose is to incrementally recognize
13632 /// an explode_vector followed by scalar reduction as a vector reduction node.
13633 /// This exists to recover from a deficiency in SLP which can't handle
13634 /// forests with multiple roots sharing common nodes.  In some cases, one
13635 /// of the trees will be vectorized, and the other will remain (unprofitably)
13636 /// scalarized.
13637 static SDValue
13638 combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
13639                                   const RISCVSubtarget &Subtarget) {
13640 
13641   // This transforms need to run before all integer types have been legalized
13642   // to i64 (so that the vector element type matches the add type), and while
13643   // it's safe to introduce odd sized vector types.
13644   if (DAG.NewNodesMustHaveLegalTypes)
13645     return SDValue();
13646 
13647   // Without V, this transform isn't useful.  We could form the (illegal)
13648   // operations and let them be scalarized again, but there's really no point.
13649   if (!Subtarget.hasVInstructions())
13650     return SDValue();
13651 
13652   const SDLoc DL(N);
13653   const EVT VT = N->getValueType(0);
13654   const unsigned Opc = N->getOpcode();
13655 
13656   // For FADD, we only handle the case with reassociation allowed.  We
13657   // could handle strict reduction order, but at the moment, there's no
13658   // known reason to, and the complexity isn't worth it.
13659   // TODO: Handle fminnum and fmaxnum here
13660   if (!VT.isInteger() &&
13661       (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13662     return SDValue();
13663 
13664   const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13665   assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13666          "Inconsistent mappings");
13667   SDValue LHS = N->getOperand(0);
13668   SDValue RHS = N->getOperand(1);
13669 
13670   if (!LHS.hasOneUse() || !RHS.hasOneUse())
13671     return SDValue();
13672 
13673   if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13674     std::swap(LHS, RHS);
13675 
13676   if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13677       !isa<ConstantSDNode>(RHS.getOperand(1)))
13678     return SDValue();
13679 
13680   uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13681   SDValue SrcVec = RHS.getOperand(0);
13682   EVT SrcVecVT = SrcVec.getValueType();
13683   assert(SrcVecVT.getVectorElementType() == VT);
13684   if (SrcVecVT.isScalableVector())
13685     return SDValue();
13686 
13687   if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13688     return SDValue();
13689 
13690   // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13691   // reduce_op (extract_subvector [2 x VT] from V).  This will form the
13692   // root of our reduction tree. TODO: We could extend this to any two
13693   // adjacent aligned constant indices if desired.
13694   if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13695       LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13696     uint64_t LHSIdx =
13697       cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13698     if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13699       EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13700       SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13701                                 DAG.getVectorIdxConstant(0, DL));
13702       return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13703     }
13704   }
13705 
13706   // Match (binop (reduce (extract_subvector V, 0),
13707   //                      (extract_vector_elt V, sizeof(SubVec))))
13708   // into a reduction of one more element from the original vector V.
13709   if (LHS.getOpcode() != ReduceOpc)
13710     return SDValue();
13711 
13712   SDValue ReduceVec = LHS.getOperand(0);
13713   if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13714       ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13715       isNullConstant(ReduceVec.getOperand(1)) &&
13716       ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13717     // For illegal types (e.g. 3xi32), most will be combined again into a
13718     // wider (hopefully legal) type.  If this is a terminal state, we are
13719     // relying on type legalization here to produce something reasonable
13720     // and this lowering quality could probably be improved. (TODO)
13721     EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13722     SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13723                               DAG.getVectorIdxConstant(0, DL));
13724     return DAG.getNode(ReduceOpc, DL, VT, Vec,
13725                        ReduceVec->getFlags() & N->getFlags());
13726   }
13727 
13728   return SDValue();
13729 }
13730 
13731 
13732 // Try to fold (<bop> x, (reduction.<bop> vec, start))
13733 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
13734                                     const RISCVSubtarget &Subtarget) {
13735   auto BinOpToRVVReduce = [](unsigned Opc) {
13736     switch (Opc) {
13737     default:
13738       llvm_unreachable("Unhandled binary to transfrom reduction");
13739     case ISD::ADD:
13740       return RISCVISD::VECREDUCE_ADD_VL;
13741     case ISD::UMAX:
13742       return RISCVISD::VECREDUCE_UMAX_VL;
13743     case ISD::SMAX:
13744       return RISCVISD::VECREDUCE_SMAX_VL;
13745     case ISD::UMIN:
13746       return RISCVISD::VECREDUCE_UMIN_VL;
13747     case ISD::SMIN:
13748       return RISCVISD::VECREDUCE_SMIN_VL;
13749     case ISD::AND:
13750       return RISCVISD::VECREDUCE_AND_VL;
13751     case ISD::OR:
13752       return RISCVISD::VECREDUCE_OR_VL;
13753     case ISD::XOR:
13754       return RISCVISD::VECREDUCE_XOR_VL;
13755     case ISD::FADD:
13756       return RISCVISD::VECREDUCE_FADD_VL;
13757     case ISD::FMAXNUM:
13758       return RISCVISD::VECREDUCE_FMAX_VL;
13759     case ISD::FMINNUM:
13760       return RISCVISD::VECREDUCE_FMIN_VL;
13761     }
13762   };
13763 
13764   auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13765     return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13766            isNullConstant(V.getOperand(1)) &&
13767            V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13768   };
13769 
13770   unsigned Opc = N->getOpcode();
13771   unsigned ReduceIdx;
13772   if (IsReduction(N->getOperand(0), Opc))
13773     ReduceIdx = 0;
13774   else if (IsReduction(N->getOperand(1), Opc))
13775     ReduceIdx = 1;
13776   else
13777     return SDValue();
13778 
13779   // Skip if FADD disallows reassociation but the combiner needs.
13780   if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13781     return SDValue();
13782 
13783   SDValue Extract = N->getOperand(ReduceIdx);
13784   SDValue Reduce = Extract.getOperand(0);
13785   if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13786     return SDValue();
13787 
13788   SDValue ScalarV = Reduce.getOperand(2);
13789   EVT ScalarVT = ScalarV.getValueType();
13790   if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13791       ScalarV.getOperand(0)->isUndef() &&
13792       isNullConstant(ScalarV.getOperand(2)))
13793     ScalarV = ScalarV.getOperand(1);
13794 
13795   // Make sure that ScalarV is a splat with VL=1.
13796   if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13797       ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13798       ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13799     return SDValue();
13800 
13801   if (!isNonZeroAVL(ScalarV.getOperand(2)))
13802     return SDValue();
13803 
13804   // Check the scalar of ScalarV is neutral element
13805   // TODO: Deal with value other than neutral element.
13806   if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13807                          0))
13808     return SDValue();
13809 
13810   // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13811   // FIXME: We might be able to improve this if operand 0 is undef.
13812   if (!isNonZeroAVL(Reduce.getOperand(5)))
13813     return SDValue();
13814 
13815   SDValue NewStart = N->getOperand(1 - ReduceIdx);
13816 
13817   SDLoc DL(N);
13818   SDValue NewScalarV =
13819       lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13820                         ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13821 
13822   // If we looked through an INSERT_SUBVECTOR we need to restore it.
13823   if (ScalarVT != ScalarV.getValueType())
13824     NewScalarV =
13825         DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13826                     NewScalarV, DAG.getVectorIdxConstant(0, DL));
13827 
13828   SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13829                    NewScalarV,           Reduce.getOperand(3),
13830                    Reduce.getOperand(4), Reduce.getOperand(5)};
13831   SDValue NewReduce =
13832       DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13833   return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13834                      Extract.getOperand(1));
13835 }
13836 
13837 // Optimize (add (shl x, c0), (shl y, c1)) ->
13838 //          (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13839 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
13840                                   const RISCVSubtarget &Subtarget) {
13841   // Perform this optimization only in the zba extension.
13842   if (!Subtarget.hasStdExtZba())
13843     return SDValue();
13844 
13845   // Skip for vector types and larger types.
13846   EVT VT = N->getValueType(0);
13847   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13848     return SDValue();
13849 
13850   // The two operand nodes must be SHL and have no other use.
13851   SDValue N0 = N->getOperand(0);
13852   SDValue N1 = N->getOperand(1);
13853   if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13854       !N0->hasOneUse() || !N1->hasOneUse())
13855     return SDValue();
13856 
13857   // Check c0 and c1.
13858   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13859   auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13860   if (!N0C || !N1C)
13861     return SDValue();
13862   int64_t C0 = N0C->getSExtValue();
13863   int64_t C1 = N1C->getSExtValue();
13864   if (C0 <= 0 || C1 <= 0)
13865     return SDValue();
13866 
13867   // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13868   int64_t Bits = std::min(C0, C1);
13869   int64_t Diff = std::abs(C0 - C1);
13870   if (Diff != 1 && Diff != 2 && Diff != 3)
13871     return SDValue();
13872 
13873   // Build nodes.
13874   SDLoc DL(N);
13875   SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13876   SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13877   SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13878                               DAG.getConstant(Diff, DL, VT), NS);
13879   return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13880 }
13881 
13882 // Combine a constant select operand into its use:
13883 //
13884 // (and (select cond, -1, c), x)
13885 //   -> (select cond, x, (and x, c))  [AllOnes=1]
13886 // (or  (select cond, 0, c), x)
13887 //   -> (select cond, x, (or x, c))  [AllOnes=0]
13888 // (xor (select cond, 0, c), x)
13889 //   -> (select cond, x, (xor x, c))  [AllOnes=0]
13890 // (add (select cond, 0, c), x)
13891 //   -> (select cond, x, (add x, c))  [AllOnes=0]
13892 // (sub x, (select cond, 0, c))
13893 //   -> (select cond, x, (sub x, c))  [AllOnes=0]
13894 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
13895                                    SelectionDAG &DAG, bool AllOnes,
13896                                    const RISCVSubtarget &Subtarget) {
13897   EVT VT = N->getValueType(0);
13898 
13899   // Skip vectors.
13900   if (VT.isVector())
13901     return SDValue();
13902 
13903   if (!Subtarget.hasConditionalMoveFusion()) {
13904     // (select cond, x, (and x, c)) has custom lowering with Zicond.
13905     if ((!Subtarget.hasStdExtZicond() &&
13906          !Subtarget.hasVendorXVentanaCondOps()) ||
13907         N->getOpcode() != ISD::AND)
13908       return SDValue();
13909 
13910     // Maybe harmful when condition code has multiple use.
13911     if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13912       return SDValue();
13913 
13914     // Maybe harmful when VT is wider than XLen.
13915     if (VT.getSizeInBits() > Subtarget.getXLen())
13916       return SDValue();
13917   }
13918 
13919   if ((Slct.getOpcode() != ISD::SELECT &&
13920        Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13921       !Slct.hasOneUse())
13922     return SDValue();
13923 
13924   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13925     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
13926   };
13927 
13928   bool SwapSelectOps;
13929   unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13930   SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13931   SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13932   SDValue NonConstantVal;
13933   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13934     SwapSelectOps = false;
13935     NonConstantVal = FalseVal;
13936   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13937     SwapSelectOps = true;
13938     NonConstantVal = TrueVal;
13939   } else
13940     return SDValue();
13941 
13942   // Slct is now know to be the desired identity constant when CC is true.
13943   TrueVal = OtherOp;
13944   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13945   // Unless SwapSelectOps says the condition should be false.
13946   if (SwapSelectOps)
13947     std::swap(TrueVal, FalseVal);
13948 
13949   if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13950     return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13951                        {Slct.getOperand(0), Slct.getOperand(1),
13952                         Slct.getOperand(2), TrueVal, FalseVal});
13953 
13954   return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13955                      {Slct.getOperand(0), TrueVal, FalseVal});
13956 }
13957 
13958 // Attempt combineSelectAndUse on each operand of a commutative operator N.
13959 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
13960                                               bool AllOnes,
13961                                               const RISCVSubtarget &Subtarget) {
13962   SDValue N0 = N->getOperand(0);
13963   SDValue N1 = N->getOperand(1);
13964   if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13965     return Result;
13966   if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13967     return Result;
13968   return SDValue();
13969 }
13970 
13971 // Transform (add (mul x, c0), c1) ->
13972 //           (add (mul (add x, c1/c0), c0), c1%c0).
13973 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13974 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
13975 // to an infinite loop in DAGCombine if transformed.
13976 // Or transform (add (mul x, c0), c1) ->
13977 //              (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13978 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13979 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13980 // lead to an infinite loop in DAGCombine if transformed.
13981 // Or transform (add (mul x, c0), c1) ->
13982 //              (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13983 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13984 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13985 // lead to an infinite loop in DAGCombine if transformed.
13986 // Or transform (add (mul x, c0), c1) ->
13987 //              (mul (add x, c1/c0), c0).
13988 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13989 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
13990                                      const RISCVSubtarget &Subtarget) {
13991   // Skip for vector types and larger types.
13992   EVT VT = N->getValueType(0);
13993   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13994     return SDValue();
13995   // The first operand node must be a MUL and has no other use.
13996   SDValue N0 = N->getOperand(0);
13997   if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13998     return SDValue();
13999   // Check if c0 and c1 match above conditions.
14000   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14001   auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14002   if (!N0C || !N1C)
14003     return SDValue();
14004   // If N0C has multiple uses it's possible one of the cases in
14005   // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
14006   // in an infinite loop.
14007   if (!N0C->hasOneUse())
14008     return SDValue();
14009   int64_t C0 = N0C->getSExtValue();
14010   int64_t C1 = N1C->getSExtValue();
14011   int64_t CA, CB;
14012   if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
14013     return SDValue();
14014   // Search for proper CA (non-zero) and CB that both are simm12.
14015   if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
14016       !isInt<12>(C0 * (C1 / C0))) {
14017     CA = C1 / C0;
14018     CB = C1 % C0;
14019   } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
14020              isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
14021     CA = C1 / C0 + 1;
14022     CB = C1 % C0 - C0;
14023   } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14024              isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14025     CA = C1 / C0 - 1;
14026     CB = C1 % C0 + C0;
14027   } else
14028     return SDValue();
14029   // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
14030   SDLoc DL(N);
14031   SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
14032                              DAG.getSignedConstant(CA, DL, VT));
14033   SDValue New1 =
14034       DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
14035   return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
14036 }
14037 
14038 // add (zext, zext) -> zext (add (zext, zext))
14039 // sub (zext, zext) -> sext (sub (zext, zext))
14040 // mul (zext, zext) -> zext (mul (zext, zext))
14041 // sdiv (zext, zext) -> zext (sdiv (zext, zext))
14042 // udiv (zext, zext) -> zext (udiv (zext, zext))
14043 // srem (zext, zext) -> zext (srem (zext, zext))
14044 // urem (zext, zext) -> zext (urem (zext, zext))
14045 //
14046 // where the sum of the extend widths match, and the the range of the bin op
14047 // fits inside the width of the narrower bin op. (For profitability on rvv, we
14048 // use a power of two for both inner and outer extend.)
14049 static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG) {
14050 
14051   EVT VT = N->getValueType(0);
14052   if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
14053     return SDValue();
14054 
14055   SDValue N0 = N->getOperand(0);
14056   SDValue N1 = N->getOperand(1);
14057   if (N0.getOpcode() != ISD::ZERO_EXTEND || N1.getOpcode() != ISD::ZERO_EXTEND)
14058     return SDValue();
14059   if (!N0.hasOneUse() || !N1.hasOneUse())
14060     return SDValue();
14061 
14062   SDValue Src0 = N0.getOperand(0);
14063   SDValue Src1 = N1.getOperand(0);
14064   EVT SrcVT = Src0.getValueType();
14065   if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14066       SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14067       SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14068     return SDValue();
14069 
14070   LLVMContext &C = *DAG.getContext();
14071   EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(C);
14072   EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14073 
14074   Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
14075   Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
14076 
14077   // Src0 and Src1 are zero extended, so they're always positive if signed.
14078   //
14079   // sub can produce a negative from two positive operands, so it needs sign
14080   // extended. Other nodes produce a positive from two positive operands, so
14081   // zero extend instead.
14082   unsigned OuterExtend =
14083       N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14084 
14085   return DAG.getNode(
14086       OuterExtend, SDLoc(N), VT,
14087       DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
14088 }
14089 
14090 // Try to turn (add (xor bool, 1) -1) into (neg bool).
14091 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
14092   SDValue N0 = N->getOperand(0);
14093   SDValue N1 = N->getOperand(1);
14094   EVT VT = N->getValueType(0);
14095   SDLoc DL(N);
14096 
14097   // RHS should be -1.
14098   if (!isAllOnesConstant(N1))
14099     return SDValue();
14100 
14101   // Look for (xor X, 1).
14102   if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14103     return SDValue();
14104 
14105   // First xor input should be 0 or 1.
14106   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
14107   if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14108     return SDValue();
14109 
14110   // Emit a negate of the setcc.
14111   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14112                      N0.getOperand(0));
14113 }
14114 
14115 static SDValue performADDCombine(SDNode *N,
14116                                  TargetLowering::DAGCombinerInfo &DCI,
14117                                  const RISCVSubtarget &Subtarget) {
14118   SelectionDAG &DAG = DCI.DAG;
14119   if (SDValue V = combineAddOfBooleanXor(N, DAG))
14120     return V;
14121   if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14122     return V;
14123   if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14124     if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14125       return V;
14126   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14127     return V;
14128   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14129     return V;
14130   if (SDValue V = combineBinOpOfZExt(N, DAG))
14131     return V;
14132 
14133   // fold (add (select lhs, rhs, cc, 0, y), x) ->
14134   //      (select lhs, rhs, cc, x, (add x, y))
14135   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14136 }
14137 
14138 // Try to turn a sub boolean RHS and constant LHS into an addi.
14139 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
14140   SDValue N0 = N->getOperand(0);
14141   SDValue N1 = N->getOperand(1);
14142   EVT VT = N->getValueType(0);
14143   SDLoc DL(N);
14144 
14145   // Require a constant LHS.
14146   auto *N0C = dyn_cast<ConstantSDNode>(N0);
14147   if (!N0C)
14148     return SDValue();
14149 
14150   // All our optimizations involve subtracting 1 from the immediate and forming
14151   // an ADDI. Make sure the new immediate is valid for an ADDI.
14152   APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14153   if (!ImmValMinus1.isSignedIntN(12))
14154     return SDValue();
14155 
14156   SDValue NewLHS;
14157   if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14158     // (sub constant, (setcc x, y, eq/neq)) ->
14159     // (add (setcc x, y, neq/eq), constant - 1)
14160     ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14161     EVT SetCCOpVT = N1.getOperand(0).getValueType();
14162     if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14163       return SDValue();
14164     CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14165     NewLHS =
14166         DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14167   } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14168              N1.getOperand(0).getOpcode() == ISD::SETCC) {
14169     // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14170     // Since setcc returns a bool the xor is equivalent to 1-setcc.
14171     NewLHS = N1.getOperand(0);
14172   } else
14173     return SDValue();
14174 
14175   SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14176   return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14177 }
14178 
14179 // Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14180 // potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14181 // is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14182 // valid with Y=3, while 0b0000_1000_0000_0100 is not.
14183 static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,
14184                                      const RISCVSubtarget &Subtarget) {
14185   if (!Subtarget.hasStdExtZbb())
14186     return SDValue();
14187 
14188   EVT VT = N->getValueType(0);
14189 
14190   if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14191     return SDValue();
14192 
14193   SDValue N0 = N->getOperand(0);
14194   SDValue N1 = N->getOperand(1);
14195 
14196   if (N0->getOpcode() != ISD::SHL)
14197     return SDValue();
14198 
14199   auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14200   if (!ShAmtCLeft)
14201     return SDValue();
14202   unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14203 
14204   if (ShiftedAmount >= 8)
14205     return SDValue();
14206 
14207   SDValue LeftShiftOperand = N0->getOperand(0);
14208   SDValue RightShiftOperand = N1;
14209 
14210   if (ShiftedAmount != 0) { // Right operand must be a right shift.
14211     if (N1->getOpcode() != ISD::SRL)
14212       return SDValue();
14213     auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14214     if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14215       return SDValue();
14216     RightShiftOperand = N1.getOperand(0);
14217   }
14218 
14219   // At least one shift should have a single use.
14220   if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14221     return SDValue();
14222 
14223   if (LeftShiftOperand != RightShiftOperand)
14224     return SDValue();
14225 
14226   APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14227   Mask <<= ShiftedAmount;
14228   // Check that X has indeed the right shape (only the Y-th bit can be set in
14229   // every byte).
14230   if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14231     return SDValue();
14232 
14233   return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14234 }
14235 
14236 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
14237                                  const RISCVSubtarget &Subtarget) {
14238   if (SDValue V = combineSubOfBoolean(N, DAG))
14239     return V;
14240 
14241   EVT VT = N->getValueType(0);
14242   SDValue N0 = N->getOperand(0);
14243   SDValue N1 = N->getOperand(1);
14244   // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14245   if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14246       isNullConstant(N1.getOperand(1))) {
14247     ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14248     if (CCVal == ISD::SETLT) {
14249       SDLoc DL(N);
14250       unsigned ShAmt = N0.getValueSizeInBits() - 1;
14251       return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14252                          DAG.getConstant(ShAmt, DL, VT));
14253     }
14254   }
14255 
14256   if (SDValue V = combineBinOpOfZExt(N, DAG))
14257     return V;
14258   if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14259     return V;
14260 
14261   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14262   //      (select lhs, rhs, cc, x, (sub x, y))
14263   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14264 }
14265 
14266 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14267 // Legalizing setcc can introduce xors like this. Doing this transform reduces
14268 // the number of xors and may allow the xor to fold into a branch condition.
14269 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
14270   SDValue N0 = N->getOperand(0);
14271   SDValue N1 = N->getOperand(1);
14272   bool IsAnd = N->getOpcode() == ISD::AND;
14273 
14274   if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14275     return SDValue();
14276 
14277   if (!N0.hasOneUse() || !N1.hasOneUse())
14278     return SDValue();
14279 
14280   SDValue N01 = N0.getOperand(1);
14281   SDValue N11 = N1.getOperand(1);
14282 
14283   // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14284   // (xor X, -1) based on the upper bits of the other operand being 0. If the
14285   // operation is And, allow one of the Xors to use -1.
14286   if (isOneConstant(N01)) {
14287     if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14288       return SDValue();
14289   } else if (isOneConstant(N11)) {
14290     // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14291     if (!(IsAnd && isAllOnesConstant(N01)))
14292       return SDValue();
14293   } else
14294     return SDValue();
14295 
14296   EVT VT = N->getValueType(0);
14297 
14298   SDValue N00 = N0.getOperand(0);
14299   SDValue N10 = N1.getOperand(0);
14300 
14301   // The LHS of the xors needs to be 0/1.
14302   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
14303   if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14304     return SDValue();
14305 
14306   // Invert the opcode and insert a new xor.
14307   SDLoc DL(N);
14308   unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14309   SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14310   return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14311 }
14312 
14313 // Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14314 // (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14315 // value to an unsigned value. This will be lowered to vmax and series of
14316 // vnclipu instructions later. This can be extended to other truncated types
14317 // other than i8 by replacing 256 and 255 with the equivalent constants for the
14318 // type.
14319 static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG) {
14320   EVT VT = N->getValueType(0);
14321   SDValue N0 = N->getOperand(0);
14322   EVT SrcVT = N0.getValueType();
14323 
14324   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14325   if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14326     return SDValue();
14327 
14328   if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14329     return SDValue();
14330 
14331   SDValue Cond = N0.getOperand(0);
14332   SDValue True = N0.getOperand(1);
14333   SDValue False = N0.getOperand(2);
14334 
14335   if (Cond.getOpcode() != ISD::SETCC)
14336     return SDValue();
14337 
14338   // FIXME: Support the version of this pattern with the select operands
14339   // swapped.
14340   ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14341   if (CCVal != ISD::SETULT)
14342     return SDValue();
14343 
14344   SDValue CondLHS = Cond.getOperand(0);
14345   SDValue CondRHS = Cond.getOperand(1);
14346 
14347   if (CondLHS != True)
14348     return SDValue();
14349 
14350   unsigned ScalarBits = VT.getScalarSizeInBits();
14351 
14352   // FIXME: Support other constants.
14353   ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14354   if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14355     return SDValue();
14356 
14357   if (False.getOpcode() != ISD::SIGN_EXTEND)
14358     return SDValue();
14359 
14360   False = False.getOperand(0);
14361 
14362   if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14363     return SDValue();
14364 
14365   ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14366   if (!FalseRHSC || !FalseRHSC->isZero())
14367     return SDValue();
14368 
14369   ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14370   if (CCVal2 != ISD::SETGT)
14371     return SDValue();
14372 
14373   // Emit the signed to unsigned saturation pattern.
14374   SDLoc DL(N);
14375   SDValue Max =
14376       DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14377   SDValue Min =
14378       DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14379                   DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14380   return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14381 }
14382 
14383 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
14384                                       const RISCVSubtarget &Subtarget) {
14385   SDValue N0 = N->getOperand(0);
14386   EVT VT = N->getValueType(0);
14387 
14388   // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14389   // extending X. This is safe since we only need the LSB after the shift and
14390   // shift amounts larger than 31 would produce poison. If we wait until
14391   // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14392   // to use a BEXT instruction.
14393   if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14394       N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14395       !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14396     SDLoc DL(N0);
14397     SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14398     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14399     SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14400     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14401   }
14402 
14403   return combineTruncSelectToSMaxUSat(N, DAG);
14404 }
14405 
14406 // Combines two comparison operation and logic operation to one selection
14407 // operation(min, max) and logic operation. Returns new constructed Node if
14408 // conditions for optimization are satisfied.
14409 static SDValue performANDCombine(SDNode *N,
14410                                  TargetLowering::DAGCombinerInfo &DCI,
14411                                  const RISCVSubtarget &Subtarget) {
14412   SelectionDAG &DAG = DCI.DAG;
14413 
14414   SDValue N0 = N->getOperand(0);
14415   // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14416   // extending X. This is safe since we only need the LSB after the shift and
14417   // shift amounts larger than 31 would produce poison. If we wait until
14418   // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14419   // to use a BEXT instruction.
14420   if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14421       N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14422       N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14423       N0.hasOneUse()) {
14424     SDLoc DL(N);
14425     SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14426     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14427     SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14428     SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14429                               DAG.getConstant(1, DL, MVT::i64));
14430     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14431   }
14432 
14433   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14434     return V;
14435   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14436     return V;
14437 
14438   if (DCI.isAfterLegalizeDAG())
14439     if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14440       return V;
14441 
14442   // fold (and (select lhs, rhs, cc, -1, y), x) ->
14443   //      (select lhs, rhs, cc, x, (and x, y))
14444   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14445 }
14446 
14447 // Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14448 // FIXME: Generalize to other binary operators with same operand.
14449 static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
14450                                 SelectionDAG &DAG) {
14451   assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14452 
14453   if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14454       N1.getOpcode() != RISCVISD::CZERO_NEZ ||
14455       !N0.hasOneUse() || !N1.hasOneUse())
14456     return SDValue();
14457 
14458   // Should have the same condition.
14459   SDValue Cond = N0.getOperand(1);
14460   if (Cond != N1.getOperand(1))
14461     return SDValue();
14462 
14463   SDValue TrueV = N0.getOperand(0);
14464   SDValue FalseV = N1.getOperand(0);
14465 
14466   if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14467       TrueV.getOperand(1) != FalseV.getOperand(1) ||
14468       !isOneConstant(TrueV.getOperand(1)) ||
14469       !TrueV.hasOneUse() || !FalseV.hasOneUse())
14470     return SDValue();
14471 
14472   EVT VT = N->getValueType(0);
14473   SDLoc DL(N);
14474 
14475   SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14476                               Cond);
14477   SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14478                               Cond);
14479   SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14480   return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14481 }
14482 
14483 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
14484                                 const RISCVSubtarget &Subtarget) {
14485   SelectionDAG &DAG = DCI.DAG;
14486 
14487   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14488     return V;
14489   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14490     return V;
14491 
14492   if (DCI.isAfterLegalizeDAG())
14493     if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14494       return V;
14495 
14496   // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14497   // We may be able to pull a common operation out of the true and false value.
14498   SDValue N0 = N->getOperand(0);
14499   SDValue N1 = N->getOperand(1);
14500   if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14501     return V;
14502   if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14503     return V;
14504 
14505   // fold (or (select cond, 0, y), x) ->
14506   //      (select cond, x, (or x, y))
14507   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14508 }
14509 
14510 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
14511                                  const RISCVSubtarget &Subtarget) {
14512   SDValue N0 = N->getOperand(0);
14513   SDValue N1 = N->getOperand(1);
14514 
14515   // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14516   // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14517   // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14518   if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14519       N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14520       N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14521       !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14522     SDLoc DL(N);
14523     SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14524     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14525     SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14526     SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14527     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14528   }
14529 
14530   // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14531   // NOTE: Assumes ROL being legal means ROLW is legal.
14532   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14533   if (N0.getOpcode() == RISCVISD::SLLW &&
14534       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
14535       TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14536     SDLoc DL(N);
14537     return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14538                        DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14539   }
14540 
14541   // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14542   if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14543     auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14544     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14545     if (ConstN00 && CC == ISD::SETLT) {
14546       EVT VT = N0.getValueType();
14547       SDLoc DL(N0);
14548       const APInt &Imm = ConstN00->getAPIntValue();
14549       if ((Imm + 1).isSignedIntN(12))
14550         return DAG.getSetCC(DL, VT, N0.getOperand(1),
14551                             DAG.getConstant(Imm + 1, DL, VT), CC);
14552     }
14553   }
14554 
14555   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14556     return V;
14557   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14558     return V;
14559 
14560   // fold (xor (select cond, 0, y), x) ->
14561   //      (select cond, x, (xor x, y))
14562   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14563 }
14564 
14565 // Try to expand a scalar multiply to a faster sequence.
14566 static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
14567                          TargetLowering::DAGCombinerInfo &DCI,
14568                          const RISCVSubtarget &Subtarget) {
14569 
14570   EVT VT = N->getValueType(0);
14571 
14572   // LI + MUL is usually smaller than the alternative sequence.
14573   if (DAG.getMachineFunction().getFunction().hasMinSize())
14574     return SDValue();
14575 
14576   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14577     return SDValue();
14578 
14579   if (VT != Subtarget.getXLenVT())
14580     return SDValue();
14581 
14582   const bool HasShlAdd =
14583       Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14584 
14585   ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14586   if (!CNode)
14587     return SDValue();
14588   uint64_t MulAmt = CNode->getZExtValue();
14589 
14590   // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14591   // We're adding additional uses of X here, and in principle, we should be freezing
14592   // X before doing so.  However, adding freeze here causes real regressions, and no
14593   // other target properly freezes X in these cases either.
14594   SDValue X = N->getOperand(0);
14595 
14596   if (HasShlAdd) {
14597     for (uint64_t Divisor : {3, 5, 9}) {
14598       if (MulAmt % Divisor != 0)
14599         continue;
14600       uint64_t MulAmt2 = MulAmt / Divisor;
14601       // 3/5/9 * 2^N ->  shl (shXadd X, X), N
14602       if (isPowerOf2_64(MulAmt2)) {
14603         SDLoc DL(N);
14604         SDValue X = N->getOperand(0);
14605         // Put the shift first if we can fold a zext into the
14606         // shift forming a slli.uw.
14607         if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14608             X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14609           SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14610                                     DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14611           return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14612                              DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14613                              Shl);
14614         }
14615         // Otherwise, put rhe shl second so that it can fold with following
14616         // instructions (e.g. sext or add).
14617         SDValue Mul359 =
14618             DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14619                         DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14620         return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14621                            DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14622       }
14623 
14624       // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14625       if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14626         SDLoc DL(N);
14627         SDValue Mul359 =
14628             DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14629                         DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14630         return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14631                            DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14632                            Mul359);
14633       }
14634     }
14635 
14636     // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14637     // shXadd. First check if this a sum of two power of 2s because that's
14638     // easy. Then count how many zeros are up to the first bit.
14639     if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14640       unsigned ScaleShift = llvm::countr_zero(MulAmt);
14641       if (ScaleShift >= 1 && ScaleShift < 4) {
14642         unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14643         SDLoc DL(N);
14644         SDValue Shift1 =
14645             DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14646         return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14647                            DAG.getConstant(ScaleShift, DL, VT), Shift1);
14648       }
14649     }
14650 
14651     // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14652     // This is the two instruction form, there are also three instruction
14653     // variants we could implement.  e.g.
14654     //   (2^(1,2,3) * 3,5,9 + 1) << C2
14655     //   2^(C1>3) * 3,5,9 +/- 1
14656     for (uint64_t Divisor : {3, 5, 9}) {
14657       uint64_t C = MulAmt - 1;
14658       if (C <= Divisor)
14659         continue;
14660       unsigned TZ = llvm::countr_zero(C);
14661       if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14662         SDLoc DL(N);
14663         SDValue Mul359 =
14664             DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14665                         DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14666         return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14667                            DAG.getConstant(TZ, DL, VT), X);
14668       }
14669     }
14670 
14671     // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14672     if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14673       unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14674       if (ScaleShift >= 1 && ScaleShift < 4) {
14675         unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14676         SDLoc DL(N);
14677         SDValue Shift1 =
14678             DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14679         return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14680                            DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14681                                        DAG.getConstant(ScaleShift, DL, VT), X));
14682       }
14683     }
14684 
14685     // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14686     for (uint64_t Offset : {3, 5, 9}) {
14687       if (isPowerOf2_64(MulAmt + Offset)) {
14688         SDLoc DL(N);
14689         SDValue Shift1 =
14690             DAG.getNode(ISD::SHL, DL, VT, X,
14691                         DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14692         SDValue Mul359 =
14693             DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14694                         DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14695         return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14696       }
14697     }
14698   }
14699 
14700   // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14701   uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14702   if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14703     uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14704     SDLoc DL(N);
14705     SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14706                                  DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14707     SDValue Shift2 =
14708         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14709                     DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14710     return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14711   }
14712 
14713   if (HasShlAdd) {
14714     for (uint64_t Divisor : {3, 5, 9}) {
14715       if (MulAmt % Divisor != 0)
14716         continue;
14717       uint64_t MulAmt2 = MulAmt / Divisor;
14718       // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14719       // of 25 which happen to be quite common.
14720       for (uint64_t Divisor2 : {3, 5, 9}) {
14721         if (MulAmt2 % Divisor2 != 0)
14722           continue;
14723         uint64_t MulAmt3 = MulAmt2 / Divisor2;
14724         if (isPowerOf2_64(MulAmt3)) {
14725           SDLoc DL(N);
14726           SDValue Mul359A =
14727               DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14728                           DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14729           SDValue Mul359B = DAG.getNode(
14730               RISCVISD::SHL_ADD, DL, VT, Mul359A,
14731               DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14732           return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14733                              DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14734         }
14735       }
14736     }
14737   }
14738 
14739   return SDValue();
14740 }
14741 
14742 // Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14743 // (bitcast (sra (v2Xi16 (bitcast X)), 15))
14744 // Same for other equivalent types with other equivalent constants.
14745 static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG) {
14746   EVT VT = N->getValueType(0);
14747   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14748 
14749   // Do this for legal vectors unless they are i1 or i8 vectors.
14750   if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14751     return SDValue();
14752 
14753   if (N->getOperand(0).getOpcode() != ISD::AND ||
14754       N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14755     return SDValue();
14756 
14757   SDValue And = N->getOperand(0);
14758   SDValue Srl = And.getOperand(0);
14759 
14760   APInt V1, V2, V3;
14761   if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14762       !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14763       !ISD::isConstantSplatVector(Srl.getOperand(1).getNode(), V3))
14764     return SDValue();
14765 
14766   unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14767   if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14768       V3 != (HalfSize - 1))
14769     return SDValue();
14770 
14771   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14772                                 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14773                                 VT.getVectorElementCount() * 2);
14774   SDLoc DL(N);
14775   SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14776   SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14777                             DAG.getConstant(HalfSize - 1, DL, HalfVT));
14778   return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14779 }
14780 
14781 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
14782                                  TargetLowering::DAGCombinerInfo &DCI,
14783                                  const RISCVSubtarget &Subtarget) {
14784   EVT VT = N->getValueType(0);
14785   if (!VT.isVector())
14786     return expandMul(N, DAG, DCI, Subtarget);
14787 
14788   SDLoc DL(N);
14789   SDValue N0 = N->getOperand(0);
14790   SDValue N1 = N->getOperand(1);
14791   SDValue MulOper;
14792   unsigned AddSubOpc;
14793 
14794   // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14795   //        (mul x, add (y, 1)) -> (add x, (mul x, y))
14796   // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14797   //         (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14798   auto IsAddSubWith1 = [&](SDValue V) -> bool {
14799     AddSubOpc = V->getOpcode();
14800     if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14801       SDValue Opnd = V->getOperand(1);
14802       MulOper = V->getOperand(0);
14803       if (AddSubOpc == ISD::SUB)
14804         std::swap(Opnd, MulOper);
14805       if (isOneOrOneSplat(Opnd))
14806         return true;
14807     }
14808     return false;
14809   };
14810 
14811   if (IsAddSubWith1(N0)) {
14812     SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14813     return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14814   }
14815 
14816   if (IsAddSubWith1(N1)) {
14817     SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14818     return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14819   }
14820 
14821   if (SDValue V = combineBinOpOfZExt(N, DAG))
14822     return V;
14823 
14824   if (SDValue V = combineVectorMulToSraBitcast(N, DAG))
14825     return V;
14826 
14827   return SDValue();
14828 }
14829 
14830 /// According to the property that indexed load/store instructions zero-extend
14831 /// their indices, try to narrow the type of index operand.
14832 static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14833   if (isIndexTypeSigned(IndexType))
14834     return false;
14835 
14836   if (!N->hasOneUse())
14837     return false;
14838 
14839   EVT VT = N.getValueType();
14840   SDLoc DL(N);
14841 
14842   // In general, what we're doing here is seeing if we can sink a truncate to
14843   // a smaller element type into the expression tree building our index.
14844   // TODO: We can generalize this and handle a bunch more cases if useful.
14845 
14846   // Narrow a buildvector to the narrowest element type.  This requires less
14847   // work and less register pressure at high LMUL, and creates smaller constants
14848   // which may be cheaper to materialize.
14849   if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14850     KnownBits Known = DAG.computeKnownBits(N);
14851     unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14852     LLVMContext &C = *DAG.getContext();
14853     EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14854     if (ResultVT.bitsLT(VT.getVectorElementType())) {
14855       N = DAG.getNode(ISD::TRUNCATE, DL,
14856                       VT.changeVectorElementType(ResultVT), N);
14857       return true;
14858     }
14859   }
14860 
14861   // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14862   if (N.getOpcode() != ISD::SHL)
14863     return false;
14864 
14865   SDValue N0 = N.getOperand(0);
14866   if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14867       N0.getOpcode() != RISCVISD::VZEXT_VL)
14868     return false;
14869   if (!N0->hasOneUse())
14870     return false;
14871 
14872   APInt ShAmt;
14873   SDValue N1 = N.getOperand(1);
14874   if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14875     return false;
14876 
14877   SDValue Src = N0.getOperand(0);
14878   EVT SrcVT = Src.getValueType();
14879   unsigned SrcElen = SrcVT.getScalarSizeInBits();
14880   unsigned ShAmtV = ShAmt.getZExtValue();
14881   unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14882   NewElen = std::max(NewElen, 8U);
14883 
14884   // Skip if NewElen is not narrower than the original extended type.
14885   if (NewElen >= N0.getValueType().getScalarSizeInBits())
14886     return false;
14887 
14888   EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14889   EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14890 
14891   SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14892   SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14893   N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14894   return true;
14895 }
14896 
14897 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14898 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14899 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14900 // can become a sext.w instead of a shift pair.
14901 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
14902                                    const RISCVSubtarget &Subtarget) {
14903   SDValue N0 = N->getOperand(0);
14904   SDValue N1 = N->getOperand(1);
14905   EVT VT = N->getValueType(0);
14906   EVT OpVT = N0.getValueType();
14907 
14908   if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14909     return SDValue();
14910 
14911   // RHS needs to be a constant.
14912   auto *N1C = dyn_cast<ConstantSDNode>(N1);
14913   if (!N1C)
14914     return SDValue();
14915 
14916   // LHS needs to be (and X, 0xffffffff).
14917   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14918       !isa<ConstantSDNode>(N0.getOperand(1)) ||
14919       N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14920     return SDValue();
14921 
14922   // Looking for an equality compare.
14923   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14924   if (!isIntEqualitySetCC(Cond))
14925     return SDValue();
14926 
14927   // Don't do this if the sign bit is provably zero, it will be turned back into
14928   // an AND.
14929   APInt SignMask = APInt::getOneBitSet(64, 31);
14930   if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14931     return SDValue();
14932 
14933   const APInt &C1 = N1C->getAPIntValue();
14934 
14935   SDLoc dl(N);
14936   // If the constant is larger than 2^32 - 1 it is impossible for both sides
14937   // to be equal.
14938   if (C1.getActiveBits() > 32)
14939     return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14940 
14941   SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14942                                N0.getOperand(0), DAG.getValueType(MVT::i32));
14943   return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14944                                                       dl, OpVT), Cond);
14945 }
14946 
14947 static SDValue
14948 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
14949                                 const RISCVSubtarget &Subtarget) {
14950   SDValue Src = N->getOperand(0);
14951   EVT VT = N->getValueType(0);
14952   EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14953   unsigned Opc = Src.getOpcode();
14954 
14955   // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14956   // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14957   if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14958       Subtarget.hasStdExtZfhmin())
14959     return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14960                        Src.getOperand(0));
14961 
14962   // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14963   if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14964       VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14965       DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14966     return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14967                        Src.getOperand(1));
14968 
14969   return SDValue();
14970 }
14971 
14972 namespace {
14973 // Forward declaration of the structure holding the necessary information to
14974 // apply a combine.
14975 struct CombineResult;
14976 
14977 enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14978 /// Helper class for folding sign/zero extensions.
14979 /// In particular, this class is used for the following combines:
14980 /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14981 /// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14982 /// mul | mul_vl -> vwmul(u) | vwmul_su
14983 /// shl | shl_vl -> vwsll
14984 /// fadd -> vfwadd | vfwadd_w
14985 /// fsub -> vfwsub | vfwsub_w
14986 /// fmul -> vfwmul
14987 /// An object of this class represents an operand of the operation we want to
14988 /// combine.
14989 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14990 /// NodeExtensionHelper for `a` and one for `b`.
14991 ///
14992 /// This class abstracts away how the extension is materialized and
14993 /// how its number of users affect the combines.
14994 ///
14995 /// In particular:
14996 /// - VWADD_W is conceptually == add(op0, sext(op1))
14997 /// - VWADDU_W == add(op0, zext(op1))
14998 /// - VWSUB_W == sub(op0, sext(op1))
14999 /// - VWSUBU_W == sub(op0, zext(op1))
15000 /// - VFWADD_W == fadd(op0, fpext(op1))
15001 /// - VFWSUB_W == fsub(op0, fpext(op1))
15002 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
15003 /// zext|sext(smaller_value).
15004 struct NodeExtensionHelper {
15005   /// Records if this operand is like being zero extended.
15006   bool SupportsZExt;
15007   /// Records if this operand is like being sign extended.
15008   /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
15009   /// instance, a splat constant (e.g., 3), would support being both sign and
15010   /// zero extended.
15011   bool SupportsSExt;
15012   /// Records if this operand is like being floating-Point extended.
15013   bool SupportsFPExt;
15014   /// This boolean captures whether we care if this operand would still be
15015   /// around after the folding happens.
15016   bool EnforceOneUse;
15017   /// Original value that this NodeExtensionHelper represents.
15018   SDValue OrigOperand;
15019 
15020   /// Get the value feeding the extension or the value itself.
15021   /// E.g., for zext(a), this would return a.
15022   SDValue getSource() const {
15023     switch (OrigOperand.getOpcode()) {
15024     case ISD::ZERO_EXTEND:
15025     case ISD::SIGN_EXTEND:
15026     case RISCVISD::VSEXT_VL:
15027     case RISCVISD::VZEXT_VL:
15028     case RISCVISD::FP_EXTEND_VL:
15029       return OrigOperand.getOperand(0);
15030     default:
15031       return OrigOperand;
15032     }
15033   }
15034 
15035   /// Check if this instance represents a splat.
15036   bool isSplat() const {
15037     return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
15038            OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
15039   }
15040 
15041   /// Get the extended opcode.
15042   unsigned getExtOpc(ExtKind SupportsExt) const {
15043     switch (SupportsExt) {
15044     case ExtKind::SExt:
15045       return RISCVISD::VSEXT_VL;
15046     case ExtKind::ZExt:
15047       return RISCVISD::VZEXT_VL;
15048     case ExtKind::FPExt:
15049       return RISCVISD::FP_EXTEND_VL;
15050     }
15051     llvm_unreachable("Unknown ExtKind enum");
15052   }
15053 
15054   /// Get or create a value that can feed \p Root with the given extension \p
15055   /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
15056   /// operand. \see ::getSource().
15057   SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
15058                                 const RISCVSubtarget &Subtarget,
15059                                 std::optional<ExtKind> SupportsExt) const {
15060     if (!SupportsExt.has_value())
15061       return OrigOperand;
15062 
15063     MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15064 
15065     SDValue Source = getSource();
15066     assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15067     if (Source.getValueType() == NarrowVT)
15068       return Source;
15069 
15070     // vfmadd_vl -> vfwmadd_vl can take bf16 operands
15071     if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15072       assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15073              Root->getOpcode() == RISCVISD::VFMADD_VL);
15074       return Source;
15075     }
15076 
15077     unsigned ExtOpc = getExtOpc(*SupportsExt);
15078 
15079     // If we need an extension, we should be changing the type.
15080     SDLoc DL(OrigOperand);
15081     auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15082     switch (OrigOperand.getOpcode()) {
15083     case ISD::ZERO_EXTEND:
15084     case ISD::SIGN_EXTEND:
15085     case RISCVISD::VSEXT_VL:
15086     case RISCVISD::VZEXT_VL:
15087     case RISCVISD::FP_EXTEND_VL:
15088       return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
15089     case ISD::SPLAT_VECTOR:
15090       return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
15091     case RISCVISD::VMV_V_X_VL:
15092       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
15093                          DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15094     case RISCVISD::VFMV_V_F_VL:
15095       Source = Source.getOperand(1);
15096       assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15097       Source = Source.getOperand(0);
15098       assert(Source.getValueType() == NarrowVT.getVectorElementType());
15099       return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15100                          DAG.getUNDEF(NarrowVT), Source, VL);
15101     default:
15102       // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15103       // and that operand should already have the right NarrowVT so no
15104       // extension should be required at this point.
15105       llvm_unreachable("Unsupported opcode");
15106     }
15107   }
15108 
15109   /// Helper function to get the narrow type for \p Root.
15110   /// The narrow type is the type of \p Root where we divided the size of each
15111   /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15112   /// \pre Both the narrow type and the original type should be legal.
15113   static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15114     MVT VT = Root->getSimpleValueType(0);
15115 
15116     // Determine the narrow size.
15117     unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15118 
15119     MVT EltVT = SupportsExt == ExtKind::FPExt
15120                     ? MVT::getFloatingPointVT(NarrowSize)
15121                     : MVT::getIntegerVT(NarrowSize);
15122 
15123     assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15124            "Trying to extend something we can't represent");
15125     MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15126     return NarrowVT;
15127   }
15128 
15129   /// Get the opcode to materialize:
15130   /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15131   static unsigned getSExtOpcode(unsigned Opcode) {
15132     switch (Opcode) {
15133     case ISD::ADD:
15134     case RISCVISD::ADD_VL:
15135     case RISCVISD::VWADD_W_VL:
15136     case RISCVISD::VWADDU_W_VL:
15137     case ISD::OR:
15138       return RISCVISD::VWADD_VL;
15139     case ISD::SUB:
15140     case RISCVISD::SUB_VL:
15141     case RISCVISD::VWSUB_W_VL:
15142     case RISCVISD::VWSUBU_W_VL:
15143       return RISCVISD::VWSUB_VL;
15144     case ISD::MUL:
15145     case RISCVISD::MUL_VL:
15146       return RISCVISD::VWMUL_VL;
15147     default:
15148       llvm_unreachable("Unexpected opcode");
15149     }
15150   }
15151 
15152   /// Get the opcode to materialize:
15153   /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15154   static unsigned getZExtOpcode(unsigned Opcode) {
15155     switch (Opcode) {
15156     case ISD::ADD:
15157     case RISCVISD::ADD_VL:
15158     case RISCVISD::VWADD_W_VL:
15159     case RISCVISD::VWADDU_W_VL:
15160     case ISD::OR:
15161       return RISCVISD::VWADDU_VL;
15162     case ISD::SUB:
15163     case RISCVISD::SUB_VL:
15164     case RISCVISD::VWSUB_W_VL:
15165     case RISCVISD::VWSUBU_W_VL:
15166       return RISCVISD::VWSUBU_VL;
15167     case ISD::MUL:
15168     case RISCVISD::MUL_VL:
15169       return RISCVISD::VWMULU_VL;
15170     case ISD::SHL:
15171     case RISCVISD::SHL_VL:
15172       return RISCVISD::VWSLL_VL;
15173     default:
15174       llvm_unreachable("Unexpected opcode");
15175     }
15176   }
15177 
15178   /// Get the opcode to materialize:
15179   /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15180   static unsigned getFPExtOpcode(unsigned Opcode) {
15181     switch (Opcode) {
15182     case RISCVISD::FADD_VL:
15183     case RISCVISD::VFWADD_W_VL:
15184       return RISCVISD::VFWADD_VL;
15185     case RISCVISD::FSUB_VL:
15186     case RISCVISD::VFWSUB_W_VL:
15187       return RISCVISD::VFWSUB_VL;
15188     case RISCVISD::FMUL_VL:
15189       return RISCVISD::VFWMUL_VL;
15190     case RISCVISD::VFMADD_VL:
15191       return RISCVISD::VFWMADD_VL;
15192     case RISCVISD::VFMSUB_VL:
15193       return RISCVISD::VFWMSUB_VL;
15194     case RISCVISD::VFNMADD_VL:
15195       return RISCVISD::VFWNMADD_VL;
15196     case RISCVISD::VFNMSUB_VL:
15197       return RISCVISD::VFWNMSUB_VL;
15198     default:
15199       llvm_unreachable("Unexpected opcode");
15200     }
15201   }
15202 
15203   /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15204   /// newOpcode(a, b).
15205   static unsigned getSUOpcode(unsigned Opcode) {
15206     assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15207            "SU is only supported for MUL");
15208     return RISCVISD::VWMULSU_VL;
15209   }
15210 
15211   /// Get the opcode to materialize
15212   /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15213   static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15214     switch (Opcode) {
15215     case ISD::ADD:
15216     case RISCVISD::ADD_VL:
15217     case ISD::OR:
15218       return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15219                                           : RISCVISD::VWADDU_W_VL;
15220     case ISD::SUB:
15221     case RISCVISD::SUB_VL:
15222       return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15223                                           : RISCVISD::VWSUBU_W_VL;
15224     case RISCVISD::FADD_VL:
15225       return RISCVISD::VFWADD_W_VL;
15226     case RISCVISD::FSUB_VL:
15227       return RISCVISD::VFWSUB_W_VL;
15228     default:
15229       llvm_unreachable("Unexpected opcode");
15230     }
15231   }
15232 
15233   using CombineToTry = std::function<std::optional<CombineResult>(
15234       SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15235       const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15236       const RISCVSubtarget &)>;
15237 
15238   /// Check if this node needs to be fully folded or extended for all users.
15239   bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15240 
15241   void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15242                                       const RISCVSubtarget &Subtarget) {
15243     unsigned Opc = OrigOperand.getOpcode();
15244     MVT VT = OrigOperand.getSimpleValueType();
15245 
15246     assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15247            "Unexpected Opcode");
15248 
15249     // The pasthru must be undef for tail agnostic.
15250     if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15251       return;
15252 
15253     // Get the scalar value.
15254     SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15255                                           : OrigOperand.getOperand(1);
15256 
15257     // See if we have enough sign bits or zero bits in the scalar to use a
15258     // widening opcode by splatting to smaller element size.
15259     unsigned EltBits = VT.getScalarSizeInBits();
15260     unsigned ScalarBits = Op.getValueSizeInBits();
15261     // If we're not getting all bits from the element, we need special handling.
15262     if (ScalarBits < EltBits) {
15263       // This should only occur on RV32.
15264       assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15265              !Subtarget.is64Bit() && "Unexpected splat");
15266       // vmv.v.x sign extends narrow inputs.
15267       SupportsSExt = true;
15268 
15269       // If the input is positive, then sign extend is also zero extend.
15270       if (DAG.SignBitIsZero(Op))
15271         SupportsZExt = true;
15272 
15273       EnforceOneUse = false;
15274       return;
15275     }
15276 
15277     unsigned NarrowSize = EltBits / 2;
15278     // If the narrow type cannot be expressed with a legal VMV,
15279     // this is not a valid candidate.
15280     if (NarrowSize < 8)
15281       return;
15282 
15283     if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15284       SupportsSExt = true;
15285 
15286     if (DAG.MaskedValueIsZero(Op,
15287                               APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15288       SupportsZExt = true;
15289 
15290     EnforceOneUse = false;
15291   }
15292 
15293   bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15294                            const RISCVSubtarget &Subtarget) {
15295     // Any f16 extension will neeed zvfh
15296     if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15297       return false;
15298     // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15299     // zvfbfwma
15300     if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15301                                      Root->getOpcode() != RISCVISD::VFMADD_VL))
15302       return false;
15303     return true;
15304   }
15305 
15306   /// Helper method to set the various fields of this struct based on the
15307   /// type of \p Root.
15308   void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15309                               const RISCVSubtarget &Subtarget) {
15310     SupportsZExt = false;
15311     SupportsSExt = false;
15312     SupportsFPExt = false;
15313     EnforceOneUse = true;
15314     unsigned Opc = OrigOperand.getOpcode();
15315     // For the nodes we handle below, we end up using their inputs directly: see
15316     // getSource(). However since they either don't have a passthru or we check
15317     // that their passthru is undef, we can safely ignore their mask and VL.
15318     switch (Opc) {
15319     case ISD::ZERO_EXTEND:
15320     case ISD::SIGN_EXTEND: {
15321       MVT VT = OrigOperand.getSimpleValueType();
15322       if (!VT.isVector())
15323         break;
15324 
15325       SDValue NarrowElt = OrigOperand.getOperand(0);
15326       MVT NarrowVT = NarrowElt.getSimpleValueType();
15327       // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15328       if (NarrowVT.getVectorElementType() == MVT::i1)
15329         break;
15330 
15331       SupportsZExt = Opc == ISD::ZERO_EXTEND;
15332       SupportsSExt = Opc == ISD::SIGN_EXTEND;
15333       break;
15334     }
15335     case RISCVISD::VZEXT_VL:
15336       SupportsZExt = true;
15337       break;
15338     case RISCVISD::VSEXT_VL:
15339       SupportsSExt = true;
15340       break;
15341     case RISCVISD::FP_EXTEND_VL: {
15342       MVT NarrowEltVT =
15343           OrigOperand.getOperand(0).getSimpleValueType().getVectorElementType();
15344       if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15345         break;
15346       SupportsFPExt = true;
15347       break;
15348     }
15349     case ISD::SPLAT_VECTOR:
15350     case RISCVISD::VMV_V_X_VL:
15351       fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15352       break;
15353     case RISCVISD::VFMV_V_F_VL: {
15354       MVT VT = OrigOperand.getSimpleValueType();
15355 
15356       if (!OrigOperand.getOperand(0).isUndef())
15357         break;
15358 
15359       SDValue Op = OrigOperand.getOperand(1);
15360       if (Op.getOpcode() != ISD::FP_EXTEND)
15361         break;
15362 
15363       if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15364                                Subtarget))
15365         break;
15366 
15367       unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15368       unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15369       if (NarrowSize != ScalarBits)
15370         break;
15371 
15372       SupportsFPExt = true;
15373       break;
15374     }
15375     default:
15376       break;
15377     }
15378   }
15379 
15380   /// Check if \p Root supports any extension folding combines.
15381   static bool isSupportedRoot(const SDNode *Root,
15382                               const RISCVSubtarget &Subtarget) {
15383     switch (Root->getOpcode()) {
15384     case ISD::ADD:
15385     case ISD::SUB:
15386     case ISD::MUL: {
15387       return Root->getValueType(0).isScalableVector();
15388     }
15389     case ISD::OR: {
15390       return Root->getValueType(0).isScalableVector() &&
15391              Root->getFlags().hasDisjoint();
15392     }
15393     // Vector Widening Integer Add/Sub/Mul Instructions
15394     case RISCVISD::ADD_VL:
15395     case RISCVISD::MUL_VL:
15396     case RISCVISD::VWADD_W_VL:
15397     case RISCVISD::VWADDU_W_VL:
15398     case RISCVISD::SUB_VL:
15399     case RISCVISD::VWSUB_W_VL:
15400     case RISCVISD::VWSUBU_W_VL:
15401     // Vector Widening Floating-Point Add/Sub/Mul Instructions
15402     case RISCVISD::FADD_VL:
15403     case RISCVISD::FSUB_VL:
15404     case RISCVISD::FMUL_VL:
15405     case RISCVISD::VFWADD_W_VL:
15406     case RISCVISD::VFWSUB_W_VL:
15407       return true;
15408     case ISD::SHL:
15409       return Root->getValueType(0).isScalableVector() &&
15410              Subtarget.hasStdExtZvbb();
15411     case RISCVISD::SHL_VL:
15412       return Subtarget.hasStdExtZvbb();
15413     case RISCVISD::VFMADD_VL:
15414     case RISCVISD::VFNMSUB_VL:
15415     case RISCVISD::VFNMADD_VL:
15416     case RISCVISD::VFMSUB_VL:
15417       return true;
15418     default:
15419       return false;
15420     }
15421   }
15422 
15423   /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15424   NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15425                       const RISCVSubtarget &Subtarget) {
15426     assert(isSupportedRoot(Root, Subtarget) &&
15427            "Trying to build an helper with an "
15428            "unsupported root");
15429     assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15430     assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0)));
15431     OrigOperand = Root->getOperand(OperandIdx);
15432 
15433     unsigned Opc = Root->getOpcode();
15434     switch (Opc) {
15435     // We consider
15436     // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15437     // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15438     // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15439     case RISCVISD::VWADD_W_VL:
15440     case RISCVISD::VWADDU_W_VL:
15441     case RISCVISD::VWSUB_W_VL:
15442     case RISCVISD::VWSUBU_W_VL:
15443     case RISCVISD::VFWADD_W_VL:
15444     case RISCVISD::VFWSUB_W_VL:
15445       if (OperandIdx == 1) {
15446         SupportsZExt =
15447             Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
15448         SupportsSExt =
15449             Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL;
15450         SupportsFPExt =
15451             Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL;
15452         // There's no existing extension here, so we don't have to worry about
15453         // making sure it gets removed.
15454         EnforceOneUse = false;
15455         break;
15456       }
15457       [[fallthrough]];
15458     default:
15459       fillUpExtensionSupport(Root, DAG, Subtarget);
15460       break;
15461     }
15462   }
15463 
15464   /// Helper function to get the Mask and VL from \p Root.
15465   static std::pair<SDValue, SDValue>
15466   getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15467                const RISCVSubtarget &Subtarget) {
15468     assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15469     switch (Root->getOpcode()) {
15470     case ISD::ADD:
15471     case ISD::SUB:
15472     case ISD::MUL:
15473     case ISD::OR:
15474     case ISD::SHL: {
15475       SDLoc DL(Root);
15476       MVT VT = Root->getSimpleValueType(0);
15477       return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15478     }
15479     default:
15480       return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15481     }
15482   }
15483 
15484   /// Helper function to check if \p N is commutative with respect to the
15485   /// foldings that are supported by this class.
15486   static bool isCommutative(const SDNode *N) {
15487     switch (N->getOpcode()) {
15488     case ISD::ADD:
15489     case ISD::MUL:
15490     case ISD::OR:
15491     case RISCVISD::ADD_VL:
15492     case RISCVISD::MUL_VL:
15493     case RISCVISD::VWADD_W_VL:
15494     case RISCVISD::VWADDU_W_VL:
15495     case RISCVISD::FADD_VL:
15496     case RISCVISD::FMUL_VL:
15497     case RISCVISD::VFWADD_W_VL:
15498     case RISCVISD::VFMADD_VL:
15499     case RISCVISD::VFNMSUB_VL:
15500     case RISCVISD::VFNMADD_VL:
15501     case RISCVISD::VFMSUB_VL:
15502       return true;
15503     case ISD::SUB:
15504     case RISCVISD::SUB_VL:
15505     case RISCVISD::VWSUB_W_VL:
15506     case RISCVISD::VWSUBU_W_VL:
15507     case RISCVISD::FSUB_VL:
15508     case RISCVISD::VFWSUB_W_VL:
15509     case ISD::SHL:
15510     case RISCVISD::SHL_VL:
15511       return false;
15512     default:
15513       llvm_unreachable("Unexpected opcode");
15514     }
15515   }
15516 
15517   /// Get a list of combine to try for folding extensions in \p Root.
15518   /// Note that each returned CombineToTry function doesn't actually modify
15519   /// anything. Instead they produce an optional CombineResult that if not None,
15520   /// need to be materialized for the combine to be applied.
15521   /// \see CombineResult::materialize.
15522   /// If the related CombineToTry function returns std::nullopt, that means the
15523   /// combine didn't match.
15524   static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15525 };
15526 
15527 /// Helper structure that holds all the necessary information to materialize a
15528 /// combine that does some extension folding.
15529 struct CombineResult {
15530   /// Opcode to be generated when materializing the combine.
15531   unsigned TargetOpcode;
15532   // No value means no extension is needed.
15533   std::optional<ExtKind> LHSExt;
15534   std::optional<ExtKind> RHSExt;
15535   /// Root of the combine.
15536   SDNode *Root;
15537   /// LHS of the TargetOpcode.
15538   NodeExtensionHelper LHS;
15539   /// RHS of the TargetOpcode.
15540   NodeExtensionHelper RHS;
15541 
15542   CombineResult(unsigned TargetOpcode, SDNode *Root,
15543                 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15544                 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15545       : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15546         LHS(LHS), RHS(RHS) {}
15547 
15548   /// Return a value that uses TargetOpcode and that can be used to replace
15549   /// Root.
15550   /// The actual replacement is *not* done in that method.
15551   SDValue materialize(SelectionDAG &DAG,
15552                       const RISCVSubtarget &Subtarget) const {
15553     SDValue Mask, VL, Passthru;
15554     std::tie(Mask, VL) =
15555         NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15556     switch (Root->getOpcode()) {
15557     default:
15558       Passthru = Root->getOperand(2);
15559       break;
15560     case ISD::ADD:
15561     case ISD::SUB:
15562     case ISD::MUL:
15563     case ISD::OR:
15564     case ISD::SHL:
15565       Passthru = DAG.getUNDEF(Root->getValueType(0));
15566       break;
15567     }
15568     return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15569                        LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15570                        RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15571                        Passthru, Mask, VL);
15572   }
15573 };
15574 
15575 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15576 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15577 /// are zext) and LHS and RHS can be folded into Root.
15578 /// AllowExtMask define which form `ext` can take in this pattern.
15579 ///
15580 /// \note If the pattern can match with both zext and sext, the returned
15581 /// CombineResult will feature the zext result.
15582 ///
15583 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15584 /// can be used to apply the pattern.
15585 static std::optional<CombineResult>
15586 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15587                                  const NodeExtensionHelper &RHS,
15588                                  uint8_t AllowExtMask, SelectionDAG &DAG,
15589                                  const RISCVSubtarget &Subtarget) {
15590   if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15591     return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15592                          Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15593                          /*RHSExt=*/{ExtKind::ZExt});
15594   if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15595     return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15596                          Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15597                          /*RHSExt=*/{ExtKind::SExt});
15598   if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15599     return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15600                          Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15601                          /*RHSExt=*/{ExtKind::FPExt});
15602   return std::nullopt;
15603 }
15604 
15605 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15606 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15607 /// are zext) and LHS and RHS can be folded into Root.
15608 ///
15609 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15610 /// can be used to apply the pattern.
15611 static std::optional<CombineResult>
15612 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15613                              const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15614                              const RISCVSubtarget &Subtarget) {
15615   return canFoldToVWWithSameExtensionImpl(
15616       Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15617       Subtarget);
15618 }
15619 
15620 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15621 ///
15622 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15623 /// can be used to apply the pattern.
15624 static std::optional<CombineResult>
15625 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15626               const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15627               const RISCVSubtarget &Subtarget) {
15628   if (RHS.SupportsFPExt)
15629     return CombineResult(
15630         NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15631         Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15632 
15633   // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15634   // sext/zext?
15635   // Control this behavior behind an option (AllowSplatInVW_W) for testing
15636   // purposes.
15637   if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15638     return CombineResult(
15639         NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15640         LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15641   if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15642     return CombineResult(
15643         NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15644         LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15645   return std::nullopt;
15646 }
15647 
15648 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15649 ///
15650 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15651 /// can be used to apply the pattern.
15652 static std::optional<CombineResult>
15653 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15654                     const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15655                     const RISCVSubtarget &Subtarget) {
15656   return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15657                                           Subtarget);
15658 }
15659 
15660 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15661 ///
15662 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15663 /// can be used to apply the pattern.
15664 static std::optional<CombineResult>
15665 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15666                     const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15667                     const RISCVSubtarget &Subtarget) {
15668   return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15669                                           Subtarget);
15670 }
15671 
15672 /// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15673 ///
15674 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15675 /// can be used to apply the pattern.
15676 static std::optional<CombineResult>
15677 canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15678                      const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15679                      const RISCVSubtarget &Subtarget) {
15680   return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15681                                           Subtarget);
15682 }
15683 
15684 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15685 ///
15686 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15687 /// can be used to apply the pattern.
15688 static std::optional<CombineResult>
15689 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15690                const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15691                const RISCVSubtarget &Subtarget) {
15692 
15693   if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15694     return std::nullopt;
15695   return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15696                        Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15697                        /*RHSExt=*/{ExtKind::ZExt});
15698 }
15699 
15700 SmallVector<NodeExtensionHelper::CombineToTry>
15701 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15702   SmallVector<CombineToTry> Strategies;
15703   switch (Root->getOpcode()) {
15704   case ISD::ADD:
15705   case ISD::SUB:
15706   case ISD::OR:
15707   case RISCVISD::ADD_VL:
15708   case RISCVISD::SUB_VL:
15709   case RISCVISD::FADD_VL:
15710   case RISCVISD::FSUB_VL:
15711     // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15712     Strategies.push_back(canFoldToVWWithSameExtension);
15713     // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15714     Strategies.push_back(canFoldToVW_W);
15715     break;
15716   case RISCVISD::FMUL_VL:
15717   case RISCVISD::VFMADD_VL:
15718   case RISCVISD::VFMSUB_VL:
15719   case RISCVISD::VFNMADD_VL:
15720   case RISCVISD::VFNMSUB_VL:
15721     Strategies.push_back(canFoldToVWWithSameExtension);
15722     break;
15723   case ISD::MUL:
15724   case RISCVISD::MUL_VL:
15725     // mul -> vwmul(u)
15726     Strategies.push_back(canFoldToVWWithSameExtension);
15727     // mul -> vwmulsu
15728     Strategies.push_back(canFoldToVW_SU);
15729     break;
15730   case ISD::SHL:
15731   case RISCVISD::SHL_VL:
15732     // shl -> vwsll
15733     Strategies.push_back(canFoldToVWWithZEXT);
15734     break;
15735   case RISCVISD::VWADD_W_VL:
15736   case RISCVISD::VWSUB_W_VL:
15737     // vwadd_w|vwsub_w -> vwadd|vwsub
15738     Strategies.push_back(canFoldToVWWithSEXT);
15739     break;
15740   case RISCVISD::VWADDU_W_VL:
15741   case RISCVISD::VWSUBU_W_VL:
15742     // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15743     Strategies.push_back(canFoldToVWWithZEXT);
15744     break;
15745   case RISCVISD::VFWADD_W_VL:
15746   case RISCVISD::VFWSUB_W_VL:
15747     // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15748     Strategies.push_back(canFoldToVWWithFPEXT);
15749     break;
15750   default:
15751     llvm_unreachable("Unexpected opcode");
15752   }
15753   return Strategies;
15754 }
15755 } // End anonymous namespace.
15756 
15757 /// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15758 /// The supported combines are:
15759 /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15760 /// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15761 /// mul | mul_vl -> vwmul(u) | vwmul_su
15762 /// shl | shl_vl -> vwsll
15763 /// fadd_vl ->  vfwadd | vfwadd_w
15764 /// fsub_vl ->  vfwsub | vfwsub_w
15765 /// fmul_vl ->  vfwmul
15766 /// vwadd_w(u) -> vwadd(u)
15767 /// vwsub_w(u) -> vwsub(u)
15768 /// vfwadd_w -> vfwadd
15769 /// vfwsub_w -> vfwsub
15770 static SDValue combineOp_VLToVWOp_VL(SDNode *N,
15771                                      TargetLowering::DAGCombinerInfo &DCI,
15772                                      const RISCVSubtarget &Subtarget) {
15773   SelectionDAG &DAG = DCI.DAG;
15774   if (DCI.isBeforeLegalize())
15775     return SDValue();
15776 
15777   if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15778     return SDValue();
15779 
15780   SmallVector<SDNode *> Worklist;
15781   SmallSet<SDNode *, 8> Inserted;
15782   Worklist.push_back(N);
15783   Inserted.insert(N);
15784   SmallVector<CombineResult> CombinesToApply;
15785 
15786   while (!Worklist.empty()) {
15787     SDNode *Root = Worklist.pop_back_val();
15788 
15789     NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15790     NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15791     auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15792                                 &Inserted](const NodeExtensionHelper &Op) {
15793       if (Op.needToPromoteOtherUsers()) {
15794         for (SDUse &Use : Op.OrigOperand->uses()) {
15795           SDNode *TheUser = Use.getUser();
15796           if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15797             return false;
15798           // We only support the first 2 operands of FMA.
15799           if (Use.getOperandNo() >= 2)
15800             return false;
15801           if (Inserted.insert(TheUser).second)
15802             Worklist.push_back(TheUser);
15803         }
15804       }
15805       return true;
15806     };
15807 
15808     // Control the compile time by limiting the number of node we look at in
15809     // total.
15810     if (Inserted.size() > ExtensionMaxWebSize)
15811       return SDValue();
15812 
15813     SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
15814         NodeExtensionHelper::getSupportedFoldings(Root);
15815 
15816     assert(!FoldingStrategies.empty() && "Nothing to be folded");
15817     bool Matched = false;
15818     for (int Attempt = 0;
15819          (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15820          ++Attempt) {
15821 
15822       for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15823            FoldingStrategies) {
15824         std::optional<CombineResult> Res =
15825             FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15826         if (Res) {
15827           Matched = true;
15828           CombinesToApply.push_back(*Res);
15829           // All the inputs that are extended need to be folded, otherwise
15830           // we would be leaving the old input (since it is may still be used),
15831           // and the new one.
15832           if (Res->LHSExt.has_value())
15833             if (!AppendUsersIfNeeded(LHS))
15834               return SDValue();
15835           if (Res->RHSExt.has_value())
15836             if (!AppendUsersIfNeeded(RHS))
15837               return SDValue();
15838           break;
15839         }
15840       }
15841       std::swap(LHS, RHS);
15842     }
15843     // Right now we do an all or nothing approach.
15844     if (!Matched)
15845       return SDValue();
15846   }
15847   // Store the value for the replacement of the input node separately.
15848   SDValue InputRootReplacement;
15849   // We do the RAUW after we materialize all the combines, because some replaced
15850   // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15851   // some of these nodes may appear in the NodeExtensionHelpers of some of the
15852   // yet-to-be-visited CombinesToApply roots.
15853   SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
15854   ValuesToReplace.reserve(CombinesToApply.size());
15855   for (CombineResult Res : CombinesToApply) {
15856     SDValue NewValue = Res.materialize(DAG, Subtarget);
15857     if (!InputRootReplacement) {
15858       assert(Res.Root == N &&
15859              "First element is expected to be the current node");
15860       InputRootReplacement = NewValue;
15861     } else {
15862       ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15863     }
15864   }
15865   for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15866     DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15867     DCI.AddToWorklist(OldNewValues.second.getNode());
15868   }
15869   return InputRootReplacement;
15870 }
15871 
15872 // Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15873 //      (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15874 // y will be the Passthru and cond will be the Mask.
15875 static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {
15876   unsigned Opc = N->getOpcode();
15877   assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
15878          Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
15879 
15880   SDValue Y = N->getOperand(0);
15881   SDValue MergeOp = N->getOperand(1);
15882   unsigned MergeOpc = MergeOp.getOpcode();
15883 
15884   if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15885     return SDValue();
15886 
15887   SDValue X = MergeOp->getOperand(1);
15888 
15889   if (!MergeOp.hasOneUse())
15890     return SDValue();
15891 
15892   // Passthru should be undef
15893   SDValue Passthru = N->getOperand(2);
15894   if (!Passthru.isUndef())
15895     return SDValue();
15896 
15897   // Mask should be all ones
15898   SDValue Mask = N->getOperand(3);
15899   if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15900     return SDValue();
15901 
15902   // False value of MergeOp should be all zeros
15903   SDValue Z = MergeOp->getOperand(2);
15904 
15905   if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15906       (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15907     Z = Z.getOperand(1);
15908 
15909   if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15910     return SDValue();
15911 
15912   return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15913                      {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15914                      N->getFlags());
15915 }
15916 
15917 static SDValue performVWADDSUBW_VLCombine(SDNode *N,
15918                                           TargetLowering::DAGCombinerInfo &DCI,
15919                                           const RISCVSubtarget &Subtarget) {
15920   [[maybe_unused]] unsigned Opc = N->getOpcode();
15921   assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
15922          Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
15923 
15924   if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15925     return V;
15926 
15927   return combineVWADDSUBWSelect(N, DCI.DAG);
15928 }
15929 
15930 // Helper function for performMemPairCombine.
15931 // Try to combine the memory loads/stores LSNode1 and LSNode2
15932 // into a single memory pair operation.
15933 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
15934                                  LSBaseSDNode *LSNode2, SDValue BasePtr,
15935                                  uint64_t Imm) {
15936   SmallPtrSet<const SDNode *, 32> Visited;
15937   SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15938 
15939   if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15940       SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15941     return SDValue();
15942 
15943   MachineFunction &MF = DAG.getMachineFunction();
15944   const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15945 
15946   // The new operation has twice the width.
15947   MVT XLenVT = Subtarget.getXLenVT();
15948   EVT MemVT = LSNode1->getMemoryVT();
15949   EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15950   MachineMemOperand *MMO = LSNode1->getMemOperand();
15951   MachineMemOperand *NewMMO = MF.getMachineMemOperand(
15952       MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15953 
15954   if (LSNode1->getOpcode() == ISD::LOAD) {
15955     auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15956     unsigned Opcode;
15957     if (MemVT == MVT::i32)
15958       Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15959     else
15960       Opcode = RISCVISD::TH_LDD;
15961 
15962     SDValue Res = DAG.getMemIntrinsicNode(
15963         Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15964         {LSNode1->getChain(), BasePtr,
15965          DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15966         NewMemVT, NewMMO);
15967 
15968     SDValue Node1 =
15969         DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15970     SDValue Node2 =
15971         DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15972 
15973     DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15974     return Node1;
15975   } else {
15976     unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15977 
15978     SDValue Res = DAG.getMemIntrinsicNode(
15979         Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15980         {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15981          BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15982         NewMemVT, NewMMO);
15983 
15984     DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15985     return Res;
15986   }
15987 }
15988 
15989 // Try to combine two adjacent loads/stores to a single pair instruction from
15990 // the XTHeadMemPair vendor extension.
15991 static SDValue performMemPairCombine(SDNode *N,
15992                                      TargetLowering::DAGCombinerInfo &DCI) {
15993   SelectionDAG &DAG = DCI.DAG;
15994   MachineFunction &MF = DAG.getMachineFunction();
15995   const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15996 
15997   // Target does not support load/store pair.
15998   if (!Subtarget.hasVendorXTHeadMemPair())
15999     return SDValue();
16000 
16001   LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
16002   EVT MemVT = LSNode1->getMemoryVT();
16003   unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
16004 
16005   // No volatile, indexed or atomic loads/stores.
16006   if (!LSNode1->isSimple() || LSNode1->isIndexed())
16007     return SDValue();
16008 
16009   // Function to get a base + constant representation from a memory value.
16010   auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
16011     if (Ptr->getOpcode() == ISD::ADD)
16012       if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
16013         return {Ptr->getOperand(0), C1->getZExtValue()};
16014     return {Ptr, 0};
16015   };
16016 
16017   auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
16018 
16019   SDValue Chain = N->getOperand(0);
16020   for (SDUse &Use : Chain->uses()) {
16021     if (Use.getUser() != N && Use.getResNo() == 0 &&
16022         Use.getUser()->getOpcode() == N->getOpcode()) {
16023       LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
16024 
16025       // No volatile, indexed or atomic loads/stores.
16026       if (!LSNode2->isSimple() || LSNode2->isIndexed())
16027         continue;
16028 
16029       // Check if LSNode1 and LSNode2 have the same type and extension.
16030       if (LSNode1->getOpcode() == ISD::LOAD)
16031         if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
16032             cast<LoadSDNode>(LSNode1)->getExtensionType())
16033           continue;
16034 
16035       if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
16036         continue;
16037 
16038       auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
16039 
16040       // Check if the base pointer is the same for both instruction.
16041       if (Base1 != Base2)
16042         continue;
16043 
16044       // Check if the offsets match the XTHeadMemPair encoding contraints.
16045       bool Valid = false;
16046       if (MemVT == MVT::i32) {
16047         // Check for adjacent i32 values and a 2-bit index.
16048         if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16049           Valid = true;
16050       } else if (MemVT == MVT::i64) {
16051         // Check for adjacent i64 values and a 2-bit index.
16052         if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16053           Valid = true;
16054       }
16055 
16056       if (!Valid)
16057         continue;
16058 
16059       // Try to combine.
16060       if (SDValue Res =
16061               tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
16062         return Res;
16063     }
16064   }
16065 
16066   return SDValue();
16067 }
16068 
16069 // Fold
16070 //   (fp_to_int (froundeven X)) -> fcvt X, rne
16071 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
16072 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
16073 //   (fp_to_int (fceil X))      -> fcvt X, rup
16074 //   (fp_to_int (fround X))     -> fcvt X, rmm
16075 //   (fp_to_int (frint X))      -> fcvt X
16076 static SDValue performFP_TO_INTCombine(SDNode *N,
16077                                        TargetLowering::DAGCombinerInfo &DCI,
16078                                        const RISCVSubtarget &Subtarget) {
16079   SelectionDAG &DAG = DCI.DAG;
16080   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16081   MVT XLenVT = Subtarget.getXLenVT();
16082 
16083   SDValue Src = N->getOperand(0);
16084 
16085   // Don't do this for strict-fp Src.
16086   if (Src->isStrictFPOpcode())
16087     return SDValue();
16088 
16089   // Ensure the FP type is legal.
16090   if (!TLI.isTypeLegal(Src.getValueType()))
16091     return SDValue();
16092 
16093   // Don't do this for f16 with Zfhmin and not Zfh.
16094   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16095     return SDValue();
16096 
16097   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16098   // If the result is invalid, we didn't find a foldable instruction.
16099   if (FRM == RISCVFPRndMode::Invalid)
16100     return SDValue();
16101 
16102   SDLoc DL(N);
16103   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16104   EVT VT = N->getValueType(0);
16105 
16106   if (VT.isVector() && TLI.isTypeLegal(VT)) {
16107     MVT SrcVT = Src.getSimpleValueType();
16108     MVT SrcContainerVT = SrcVT;
16109     MVT ContainerVT = VT.getSimpleVT();
16110     SDValue XVal = Src.getOperand(0);
16111 
16112     // For widening and narrowing conversions we just combine it into a
16113     // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16114     // end up getting lowered to their appropriate pseudo instructions based on
16115     // their operand types
16116     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16117         VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16118       return SDValue();
16119 
16120     // Make fixed-length vectors scalable first
16121     if (SrcVT.isFixedLengthVector()) {
16122       SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16123       XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16124       ContainerVT =
16125           getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16126     }
16127 
16128     auto [Mask, VL] =
16129         getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16130 
16131     SDValue FpToInt;
16132     if (FRM == RISCVFPRndMode::RTZ) {
16133       // Use the dedicated trunc static rounding mode if we're truncating so we
16134       // don't need to generate calls to fsrmi/fsrm
16135       unsigned Opc =
16136           IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
16137       FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16138     } else {
16139       unsigned Opc =
16140           IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
16141       FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16142                             DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16143     }
16144 
16145     // If converted from fixed-length to scalable, convert back
16146     if (VT.isFixedLengthVector())
16147       FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16148 
16149     return FpToInt;
16150   }
16151 
16152   // Only handle XLen or i32 types. Other types narrower than XLen will
16153   // eventually be legalized to XLenVT.
16154   if (VT != MVT::i32 && VT != XLenVT)
16155     return SDValue();
16156 
16157   unsigned Opc;
16158   if (VT == XLenVT)
16159     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16160   else
16161     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
16162 
16163   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16164                                 DAG.getTargetConstant(FRM, DL, XLenVT));
16165   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16166 }
16167 
16168 // Fold
16169 //   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16170 //   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
16171 //   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
16172 //   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
16173 //   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
16174 //   (fp_to_int_sat (frint X))      -> (select X == nan, 0, (fcvt X, dyn))
16175 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
16176                                        TargetLowering::DAGCombinerInfo &DCI,
16177                                        const RISCVSubtarget &Subtarget) {
16178   SelectionDAG &DAG = DCI.DAG;
16179   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16180   MVT XLenVT = Subtarget.getXLenVT();
16181 
16182   // Only handle XLen types. Other types narrower than XLen will eventually be
16183   // legalized to XLenVT.
16184   EVT DstVT = N->getValueType(0);
16185   if (DstVT != XLenVT)
16186     return SDValue();
16187 
16188   SDValue Src = N->getOperand(0);
16189 
16190   // Don't do this for strict-fp Src.
16191   if (Src->isStrictFPOpcode())
16192     return SDValue();
16193 
16194   // Ensure the FP type is also legal.
16195   if (!TLI.isTypeLegal(Src.getValueType()))
16196     return SDValue();
16197 
16198   // Don't do this for f16 with Zfhmin and not Zfh.
16199   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16200     return SDValue();
16201 
16202   EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16203 
16204   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16205   if (FRM == RISCVFPRndMode::Invalid)
16206     return SDValue();
16207 
16208   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16209 
16210   unsigned Opc;
16211   if (SatVT == DstVT)
16212     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16213   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16214     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
16215   else
16216     return SDValue();
16217   // FIXME: Support other SatVTs by clamping before or after the conversion.
16218 
16219   Src = Src.getOperand(0);
16220 
16221   SDLoc DL(N);
16222   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16223                                 DAG.getTargetConstant(FRM, DL, XLenVT));
16224 
16225   // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16226   // extend.
16227   if (Opc == RISCVISD::FCVT_WU_RV64)
16228     FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16229 
16230   // RISC-V FP-to-int conversions saturate to the destination register size, but
16231   // don't produce 0 for nan.
16232   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16233   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16234 }
16235 
16236 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16237 // smaller than XLenVT.
16238 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
16239                                         const RISCVSubtarget &Subtarget) {
16240   assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16241 
16242   SDValue Src = N->getOperand(0);
16243   if (Src.getOpcode() != ISD::BSWAP)
16244     return SDValue();
16245 
16246   EVT VT = N->getValueType(0);
16247   if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16248       !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16249     return SDValue();
16250 
16251   SDLoc DL(N);
16252   return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16253 }
16254 
16255 static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG,
16256                                         const RISCVSubtarget &Subtarget) {
16257   // Fold:
16258   //    vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
16259 
16260   // Check if its first operand is a vp.load.
16261   auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
16262   if (!VPLoad)
16263     return SDValue();
16264 
16265   EVT LoadVT = VPLoad->getValueType(0);
16266   // We do not have a strided_load version for masks, and the evl of vp.reverse
16267   // and vp.load should always be the same.
16268   if (!LoadVT.getVectorElementType().isByteSized() ||
16269       N->getOperand(2) != VPLoad->getVectorLength() ||
16270       !N->getOperand(0).hasOneUse())
16271     return SDValue();
16272 
16273   // Check if the mask of outer vp.reverse are all 1's.
16274   if (!isOneOrOneSplat(N->getOperand(1)))
16275     return SDValue();
16276 
16277   SDValue LoadMask = VPLoad->getMask();
16278   // If Mask is all ones, then load is unmasked and can be reversed.
16279   if (!isOneOrOneSplat(LoadMask)) {
16280     // If the mask is not all ones, we can reverse the load if the mask was also
16281     // reversed by an unmasked vp.reverse with the same EVL.
16282     if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16283         !isOneOrOneSplat(LoadMask.getOperand(1)) ||
16284         LoadMask.getOperand(2) != VPLoad->getVectorLength())
16285       return SDValue();
16286     LoadMask = LoadMask.getOperand(0);
16287   }
16288 
16289   // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
16290   SDLoc DL(N);
16291   MVT XLenVT = Subtarget.getXLenVT();
16292   SDValue NumElem = VPLoad->getVectorLength();
16293   uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
16294 
16295   SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16296                               DAG.getConstant(1, DL, XLenVT));
16297   SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16298                               DAG.getConstant(ElemWidthByte, DL, XLenVT));
16299   SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
16300   SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16301 
16302   MachineFunction &MF = DAG.getMachineFunction();
16303   MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
16304   MachineMemOperand *MMO = MF.getMachineMemOperand(
16305       PtrInfo, VPLoad->getMemOperand()->getFlags(),
16306       LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
16307 
16308   SDValue Ret = DAG.getStridedLoadVP(
16309       LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
16310       VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
16311 
16312   DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
16313 
16314   return Ret;
16315 }
16316 
16317 static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG,
16318                                       const RISCVSubtarget &Subtarget) {
16319   // Fold:
16320   //    vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
16321   //    -1, MASK)
16322   auto *VPStore = cast<VPStoreSDNode>(N);
16323 
16324   if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
16325     return SDValue();
16326 
16327   SDValue VPReverse = VPStore->getValue();
16328   EVT ReverseVT = VPReverse->getValueType(0);
16329 
16330   // We do not have a strided_store version for masks, and the evl of vp.reverse
16331   // and vp.store should always be the same.
16332   if (!ReverseVT.getVectorElementType().isByteSized() ||
16333       VPStore->getVectorLength() != VPReverse.getOperand(2) ||
16334       !VPReverse.hasOneUse())
16335     return SDValue();
16336 
16337   SDValue StoreMask = VPStore->getMask();
16338   // If Mask is all ones, then load is unmasked and can be reversed.
16339   if (!isOneOrOneSplat(StoreMask)) {
16340     // If the mask is not all ones, we can reverse the store if the mask was
16341     // also reversed by an unmasked vp.reverse with the same EVL.
16342     if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16343         !isOneOrOneSplat(StoreMask.getOperand(1)) ||
16344         StoreMask.getOperand(2) != VPStore->getVectorLength())
16345       return SDValue();
16346     StoreMask = StoreMask.getOperand(0);
16347   }
16348 
16349   // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
16350   SDLoc DL(N);
16351   MVT XLenVT = Subtarget.getXLenVT();
16352   SDValue NumElem = VPStore->getVectorLength();
16353   uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
16354 
16355   SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16356                               DAG.getConstant(1, DL, XLenVT));
16357   SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16358                               DAG.getConstant(ElemWidthByte, DL, XLenVT));
16359   SDValue Base =
16360       DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
16361   SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16362 
16363   MachineFunction &MF = DAG.getMachineFunction();
16364   MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
16365   MachineMemOperand *MMO = MF.getMachineMemOperand(
16366       PtrInfo, VPStore->getMemOperand()->getFlags(),
16367       LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
16368 
16369   return DAG.getStridedStoreVP(
16370       VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
16371       VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
16372       VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
16373       VPStore->isTruncatingStore(), VPStore->isCompressingStore());
16374 }
16375 
16376 // Convert from one FMA opcode to another based on whether we are negating the
16377 // multiply result and/or the accumulator.
16378 // NOTE: Only supports RVV operations with VL.
16379 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16380   // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16381   if (NegMul) {
16382     // clang-format off
16383     switch (Opcode) {
16384     default: llvm_unreachable("Unexpected opcode");
16385     case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFNMSUB_VL; break;
16386     case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL;  break;
16387     case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL;  break;
16388     case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFNMADD_VL; break;
16389     case RISCVISD::STRICT_VFMADD_VL:  Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
16390     case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL;  break;
16391     case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL;  break;
16392     case RISCVISD::STRICT_VFMSUB_VL:  Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
16393     }
16394     // clang-format on
16395   }
16396 
16397   // Negating the accumulator changes ADD<->SUB.
16398   if (NegAcc) {
16399     // clang-format off
16400     switch (Opcode) {
16401     default: llvm_unreachable("Unexpected opcode");
16402     case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFMSUB_VL;  break;
16403     case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFMADD_VL;  break;
16404     case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16405     case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16406     case RISCVISD::STRICT_VFMADD_VL:  Opcode = RISCVISD::STRICT_VFMSUB_VL;  break;
16407     case RISCVISD::STRICT_VFMSUB_VL:  Opcode = RISCVISD::STRICT_VFMADD_VL;  break;
16408     case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
16409     case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
16410     }
16411     // clang-format on
16412   }
16413 
16414   return Opcode;
16415 }
16416 
16417 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
16418   // Fold FNEG_VL into FMA opcodes.
16419   // The first operand of strict-fp is chain.
16420   bool IsStrict =
16421       DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16422   unsigned Offset = IsStrict ? 1 : 0;
16423   SDValue A = N->getOperand(0 + Offset);
16424   SDValue B = N->getOperand(1 + Offset);
16425   SDValue C = N->getOperand(2 + Offset);
16426   SDValue Mask = N->getOperand(3 + Offset);
16427   SDValue VL = N->getOperand(4 + Offset);
16428 
16429   auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16430     if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16431         V.getOperand(2) == VL) {
16432       // Return the negated input.
16433       V = V.getOperand(0);
16434       return true;
16435     }
16436 
16437     return false;
16438   };
16439 
16440   bool NegA = invertIfNegative(A);
16441   bool NegB = invertIfNegative(B);
16442   bool NegC = invertIfNegative(C);
16443 
16444   // If no operands are negated, we're done.
16445   if (!NegA && !NegB && !NegC)
16446     return SDValue();
16447 
16448   unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16449   if (IsStrict)
16450     return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16451                        {N->getOperand(0), A, B, C, Mask, VL});
16452   return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16453                      VL);
16454 }
16455 
16456 static SDValue performVFMADD_VLCombine(SDNode *N,
16457                                        TargetLowering::DAGCombinerInfo &DCI,
16458                                        const RISCVSubtarget &Subtarget) {
16459   SelectionDAG &DAG = DCI.DAG;
16460 
16461   if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
16462     return V;
16463 
16464   // FIXME: Ignore strict opcodes for now.
16465   if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16466     return SDValue();
16467 
16468   return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16469 }
16470 
16471 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
16472                                  const RISCVSubtarget &Subtarget) {
16473   assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16474 
16475   EVT VT = N->getValueType(0);
16476 
16477   if (VT != Subtarget.getXLenVT())
16478     return SDValue();
16479 
16480   if (!isa<ConstantSDNode>(N->getOperand(1)))
16481     return SDValue();
16482   uint64_t ShAmt = N->getConstantOperandVal(1);
16483 
16484   SDValue N0 = N->getOperand(0);
16485 
16486   // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16487   // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16488   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16489     unsigned ExtSize =
16490         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16491     if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16492         N0.getOperand(0).hasOneUse() &&
16493         isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16494       uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16495       if (LShAmt < ExtSize) {
16496         unsigned Size = VT.getSizeInBits();
16497         SDLoc ShlDL(N0.getOperand(0));
16498         SDValue Shl =
16499             DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16500                         DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16501         SDLoc DL(N);
16502         return DAG.getNode(ISD::SRA, DL, VT, Shl,
16503                            DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16504       }
16505     }
16506   }
16507 
16508   if (ShAmt > 32 || VT != MVT::i64)
16509     return SDValue();
16510 
16511   // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16512   // FIXME: Should this be a generic combine? There's a similar combine on X86.
16513   //
16514   // Also try these folds where an add or sub is in the middle.
16515   // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16516   // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16517   SDValue Shl;
16518   ConstantSDNode *AddC = nullptr;
16519 
16520   // We might have an ADD or SUB between the SRA and SHL.
16521   bool IsAdd = N0.getOpcode() == ISD::ADD;
16522   if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16523     // Other operand needs to be a constant we can modify.
16524     AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16525     if (!AddC)
16526       return SDValue();
16527 
16528     // AddC needs to have at least 32 trailing zeros.
16529     if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16530       return SDValue();
16531 
16532     // All users should be a shift by constant less than or equal to 32. This
16533     // ensures we'll do this optimization for each of them to produce an
16534     // add/sub+sext_inreg they can all share.
16535     for (SDNode *U : N0->users()) {
16536       if (U->getOpcode() != ISD::SRA ||
16537           !isa<ConstantSDNode>(U->getOperand(1)) ||
16538           U->getConstantOperandVal(1) > 32)
16539         return SDValue();
16540     }
16541 
16542     Shl = N0.getOperand(IsAdd ? 0 : 1);
16543   } else {
16544     // Not an ADD or SUB.
16545     Shl = N0;
16546   }
16547 
16548   // Look for a shift left by 32.
16549   if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16550       Shl.getConstantOperandVal(1) != 32)
16551     return SDValue();
16552 
16553   // We if we didn't look through an add/sub, then the shl should have one use.
16554   // If we did look through an add/sub, the sext_inreg we create is free so
16555   // we're only creating 2 new instructions. It's enough to only remove the
16556   // original sra+add/sub.
16557   if (!AddC && !Shl.hasOneUse())
16558     return SDValue();
16559 
16560   SDLoc DL(N);
16561   SDValue In = Shl.getOperand(0);
16562 
16563   // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16564   // constant.
16565   if (AddC) {
16566     SDValue ShiftedAddC =
16567         DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16568     if (IsAdd)
16569       In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16570     else
16571       In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16572   }
16573 
16574   SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16575                              DAG.getValueType(MVT::i32));
16576   if (ShAmt == 32)
16577     return SExt;
16578 
16579   return DAG.getNode(
16580       ISD::SHL, DL, MVT::i64, SExt,
16581       DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16582 }
16583 
16584 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16585 // the result is used as the conditon of a br_cc or select_cc we can invert,
16586 // inverting the setcc is free, and Z is 0/1. Caller will invert the
16587 // br_cc/select_cc.
16588 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
16589   bool IsAnd = Cond.getOpcode() == ISD::AND;
16590   if (!IsAnd && Cond.getOpcode() != ISD::OR)
16591     return SDValue();
16592 
16593   if (!Cond.hasOneUse())
16594     return SDValue();
16595 
16596   SDValue Setcc = Cond.getOperand(0);
16597   SDValue Xor = Cond.getOperand(1);
16598   // Canonicalize setcc to LHS.
16599   if (Setcc.getOpcode() != ISD::SETCC)
16600     std::swap(Setcc, Xor);
16601   // LHS should be a setcc and RHS should be an xor.
16602   if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16603       Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16604     return SDValue();
16605 
16606   // If the condition is an And, SimplifyDemandedBits may have changed
16607   // (xor Z, 1) to (not Z).
16608   SDValue Xor1 = Xor.getOperand(1);
16609   if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16610     return SDValue();
16611 
16612   EVT VT = Cond.getValueType();
16613   SDValue Xor0 = Xor.getOperand(0);
16614 
16615   // The LHS of the xor needs to be 0/1.
16616   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
16617   if (!DAG.MaskedValueIsZero(Xor0, Mask))
16618     return SDValue();
16619 
16620   // We can only invert integer setccs.
16621   EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16622   if (!SetCCOpVT.isScalarInteger())
16623     return SDValue();
16624 
16625   ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16626   if (ISD::isIntEqualitySetCC(CCVal)) {
16627     CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16628     Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16629                          Setcc.getOperand(1), CCVal);
16630   } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16631     // Invert (setlt 0, X) by converting to (setlt X, 1).
16632     Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16633                          DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16634   } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16635     // (setlt X, 1) by converting to (setlt 0, X).
16636     Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16637                          DAG.getConstant(0, SDLoc(Setcc), VT),
16638                          Setcc.getOperand(0), CCVal);
16639   } else
16640     return SDValue();
16641 
16642   unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16643   return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16644 }
16645 
16646 // Perform common combines for BR_CC and SELECT_CC condtions.
16647 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16648                        SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16649   ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16650 
16651   // As far as arithmetic right shift always saves the sign,
16652   // shift can be omitted.
16653   // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16654   // setge (sra X, N), 0 -> setge X, 0
16655   if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16656       LHS.getOpcode() == ISD::SRA) {
16657     LHS = LHS.getOperand(0);
16658     return true;
16659   }
16660 
16661   if (!ISD::isIntEqualitySetCC(CCVal))
16662     return false;
16663 
16664   // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16665   // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16666   if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16667       LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16668     // If we're looking for eq 0 instead of ne 0, we need to invert the
16669     // condition.
16670     bool Invert = CCVal == ISD::SETEQ;
16671     CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16672     if (Invert)
16673       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16674 
16675     RHS = LHS.getOperand(1);
16676     LHS = LHS.getOperand(0);
16677     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16678 
16679     CC = DAG.getCondCode(CCVal);
16680     return true;
16681   }
16682 
16683   // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16684   if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16685     RHS = LHS.getOperand(1);
16686     LHS = LHS.getOperand(0);
16687     return true;
16688   }
16689 
16690   // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16691   if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16692       LHS.getOperand(1).getOpcode() == ISD::Constant) {
16693     SDValue LHS0 = LHS.getOperand(0);
16694     if (LHS0.getOpcode() == ISD::AND &&
16695         LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16696       uint64_t Mask = LHS0.getConstantOperandVal(1);
16697       uint64_t ShAmt = LHS.getConstantOperandVal(1);
16698       if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16699         CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16700         CC = DAG.getCondCode(CCVal);
16701 
16702         ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16703         LHS = LHS0.getOperand(0);
16704         if (ShAmt != 0)
16705           LHS =
16706               DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16707                           DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16708         return true;
16709       }
16710     }
16711   }
16712 
16713   // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16714   // This can occur when legalizing some floating point comparisons.
16715   APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16716   if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16717     CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16718     CC = DAG.getCondCode(CCVal);
16719     RHS = DAG.getConstant(0, DL, LHS.getValueType());
16720     return true;
16721   }
16722 
16723   if (isNullConstant(RHS)) {
16724     if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16725       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16726       CC = DAG.getCondCode(CCVal);
16727       LHS = NewCond;
16728       return true;
16729     }
16730   }
16731 
16732   return false;
16733 }
16734 
16735 // Fold
16736 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16737 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16738 // (select C, (or Y, X), Y)  -> (or Y, (select C, X, 0)).
16739 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16740 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
16741                                    SDValue TrueVal, SDValue FalseVal,
16742                                    bool Swapped) {
16743   bool Commutative = true;
16744   unsigned Opc = TrueVal.getOpcode();
16745   switch (Opc) {
16746   default:
16747     return SDValue();
16748   case ISD::SHL:
16749   case ISD::SRA:
16750   case ISD::SRL:
16751   case ISD::SUB:
16752     Commutative = false;
16753     break;
16754   case ISD::ADD:
16755   case ISD::OR:
16756   case ISD::XOR:
16757     break;
16758   }
16759 
16760   if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16761     return SDValue();
16762 
16763   unsigned OpToFold;
16764   if (FalseVal == TrueVal.getOperand(0))
16765     OpToFold = 0;
16766   else if (Commutative && FalseVal == TrueVal.getOperand(1))
16767     OpToFold = 1;
16768   else
16769     return SDValue();
16770 
16771   EVT VT = N->getValueType(0);
16772   SDLoc DL(N);
16773   SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16774   EVT OtherOpVT = OtherOp.getValueType();
16775   SDValue IdentityOperand =
16776       DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16777   if (!Commutative)
16778     IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16779   assert(IdentityOperand && "No identity operand!");
16780 
16781   if (Swapped)
16782     std::swap(OtherOp, IdentityOperand);
16783   SDValue NewSel =
16784       DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16785   return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16786 }
16787 
16788 // This tries to get rid of `select` and `icmp` that are being used to handle
16789 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16790 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
16791   SDValue Cond = N->getOperand(0);
16792 
16793   // This represents either CTTZ or CTLZ instruction.
16794   SDValue CountZeroes;
16795 
16796   SDValue ValOnZero;
16797 
16798   if (Cond.getOpcode() != ISD::SETCC)
16799     return SDValue();
16800 
16801   if (!isNullConstant(Cond->getOperand(1)))
16802     return SDValue();
16803 
16804   ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16805   if (CCVal == ISD::CondCode::SETEQ) {
16806     CountZeroes = N->getOperand(2);
16807     ValOnZero = N->getOperand(1);
16808   } else if (CCVal == ISD::CondCode::SETNE) {
16809     CountZeroes = N->getOperand(1);
16810     ValOnZero = N->getOperand(2);
16811   } else {
16812     return SDValue();
16813   }
16814 
16815   if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16816       CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16817     CountZeroes = CountZeroes.getOperand(0);
16818 
16819   if (CountZeroes.getOpcode() != ISD::CTTZ &&
16820       CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16821       CountZeroes.getOpcode() != ISD::CTLZ &&
16822       CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16823     return SDValue();
16824 
16825   if (!isNullConstant(ValOnZero))
16826     return SDValue();
16827 
16828   SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16829   if (Cond->getOperand(0) != CountZeroesArgument)
16830     return SDValue();
16831 
16832   if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16833     CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16834                               CountZeroes.getValueType(), CountZeroesArgument);
16835   } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16836     CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16837                               CountZeroes.getValueType(), CountZeroesArgument);
16838   }
16839 
16840   unsigned BitWidth = CountZeroes.getValueSizeInBits();
16841   SDValue BitWidthMinusOne =
16842       DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16843 
16844   auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16845                              CountZeroes, BitWidthMinusOne);
16846   return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16847 }
16848 
16849 static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
16850                                 const RISCVSubtarget &Subtarget) {
16851   SDValue Cond = N->getOperand(0);
16852   SDValue True = N->getOperand(1);
16853   SDValue False = N->getOperand(2);
16854   SDLoc DL(N);
16855   EVT VT = N->getValueType(0);
16856   EVT CondVT = Cond.getValueType();
16857 
16858   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16859     return SDValue();
16860 
16861   // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16862   // BEXTI, where C is power of 2.
16863   if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16864       (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16865     SDValue LHS = Cond.getOperand(0);
16866     SDValue RHS = Cond.getOperand(1);
16867     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16868     if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16869         isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16870       const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16871       if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16872         return DAG.getSelect(DL, VT,
16873                              DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16874                              False, True);
16875     }
16876   }
16877   return SDValue();
16878 }
16879 
16880 static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
16881   if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
16882     return false;
16883 
16884   SwapCC = false;
16885   if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
16886     std::swap(TrueVal, FalseVal);
16887     SwapCC = true;
16888   }
16889 
16890   if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
16891     return false;
16892 
16893   SDValue A = FalseVal.getOperand(0);
16894   SDValue B = FalseVal.getOperand(1);
16895   // Add is commutative, so check both orders
16896   return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
16897           (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
16898 }
16899 
16900 /// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
16901 /// This allows us match a vadd.vv fed by a masked vrsub, which reduces
16902 /// register pressure over the add followed by masked vsub sequence.
16903 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
16904   SDLoc DL(N);
16905   EVT VT = N->getValueType(0);
16906   SDValue CC = N->getOperand(0);
16907   SDValue TrueVal = N->getOperand(1);
16908   SDValue FalseVal = N->getOperand(2);
16909 
16910   bool SwapCC;
16911   if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
16912     return SDValue();
16913 
16914   SDValue Sub = SwapCC ? TrueVal : FalseVal;
16915   SDValue A = Sub.getOperand(0);
16916   SDValue B = Sub.getOperand(1);
16917 
16918   // Arrange the select such that we can match a masked
16919   // vrsub.vi to perform the conditional negate
16920   SDValue NegB = DAG.getNegative(B, DL, VT);
16921   if (!SwapCC)
16922     CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
16923   SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
16924   return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
16925 }
16926 
16927 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
16928                                     const RISCVSubtarget &Subtarget) {
16929   if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16930     return Folded;
16931 
16932   if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16933     return V;
16934 
16935   if (Subtarget.hasConditionalMoveFusion())
16936     return SDValue();
16937 
16938   SDValue TrueVal = N->getOperand(1);
16939   SDValue FalseVal = N->getOperand(2);
16940   if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16941     return V;
16942   return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16943 }
16944 
16945 /// If we have a build_vector where each lane is binop X, C, where C
16946 /// is a constant (but not necessarily the same constant on all lanes),
16947 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16948 /// We assume that materializing a constant build vector will be no more
16949 /// expensive that performing O(n) binops.
16950 static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
16951                                           const RISCVSubtarget &Subtarget,
16952                                           const RISCVTargetLowering &TLI) {
16953   SDLoc DL(N);
16954   EVT VT = N->getValueType(0);
16955 
16956   assert(!VT.isScalableVector() && "unexpected build vector");
16957 
16958   if (VT.getVectorNumElements() == 1)
16959     return SDValue();
16960 
16961   const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16962   if (!TLI.isBinOp(Opcode))
16963     return SDValue();
16964 
16965   if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16966     return SDValue();
16967 
16968   // This BUILD_VECTOR involves an implicit truncation, and sinking
16969   // truncates through binops is non-trivial.
16970   if (N->op_begin()->getValueType() != VT.getVectorElementType())
16971     return SDValue();
16972 
16973   SmallVector<SDValue> LHSOps;
16974   SmallVector<SDValue> RHSOps;
16975   for (SDValue Op : N->ops()) {
16976     if (Op.isUndef()) {
16977       // We can't form a divide or remainder from undef.
16978       if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16979         return SDValue();
16980 
16981       LHSOps.push_back(Op);
16982       RHSOps.push_back(Op);
16983       continue;
16984     }
16985 
16986     // TODO: We can handle operations which have an neutral rhs value
16987     // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16988     // of profit in a more explicit manner.
16989     if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16990       return SDValue();
16991 
16992     LHSOps.push_back(Op.getOperand(0));
16993     if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16994         !isa<ConstantFPSDNode>(Op.getOperand(1)))
16995       return SDValue();
16996     // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16997     // have different LHS and RHS types.
16998     if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16999       return SDValue();
17000 
17001     RHSOps.push_back(Op.getOperand(1));
17002   }
17003 
17004   return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
17005                      DAG.getBuildVector(VT, DL, RHSOps));
17006 }
17007 
17008 static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
17009                                                const RISCVSubtarget &Subtarget,
17010                                                const RISCVTargetLowering &TLI) {
17011   SDValue InVec = N->getOperand(0);
17012   SDValue InVal = N->getOperand(1);
17013   SDValue EltNo = N->getOperand(2);
17014   SDLoc DL(N);
17015 
17016   EVT VT = InVec.getValueType();
17017   if (VT.isScalableVector())
17018     return SDValue();
17019 
17020   if (!InVec.hasOneUse())
17021     return SDValue();
17022 
17023   // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
17024   // move the insert_vector_elts into the arms of the binop.  Note that
17025   // the new RHS must be a constant.
17026   const unsigned InVecOpcode = InVec->getOpcode();
17027   if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
17028       InVal.hasOneUse()) {
17029     SDValue InVecLHS = InVec->getOperand(0);
17030     SDValue InVecRHS = InVec->getOperand(1);
17031     SDValue InValLHS = InVal->getOperand(0);
17032     SDValue InValRHS = InVal->getOperand(1);
17033 
17034     if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))
17035       return SDValue();
17036     if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
17037       return SDValue();
17038     // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
17039     // have different LHS and RHS types.
17040     if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
17041       return SDValue();
17042     SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
17043                               InVecLHS, InValLHS, EltNo);
17044     SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
17045                               InVecRHS, InValRHS, EltNo);
17046     return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
17047   }
17048 
17049   // Given insert_vector_elt (concat_vectors ...), InVal, Elt
17050   // move the insert_vector_elt to the source operand of the concat_vector.
17051   if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
17052     return SDValue();
17053 
17054   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17055   if (!IndexC)
17056     return SDValue();
17057   unsigned Elt = IndexC->getZExtValue();
17058 
17059   EVT ConcatVT = InVec.getOperand(0).getValueType();
17060   if (ConcatVT.getVectorElementType() != InVal.getValueType())
17061     return SDValue();
17062   unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17063   SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
17064 
17065   unsigned ConcatOpIdx = Elt / ConcatNumElts;
17066   SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
17067   ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
17068                          ConcatOp, InVal, NewIdx);
17069 
17070   SmallVector<SDValue> ConcatOps;
17071   ConcatOps.append(InVec->op_begin(), InVec->op_end());
17072   ConcatOps[ConcatOpIdx] = ConcatOp;
17073   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17074 }
17075 
17076 // If we're concatenating a series of vector loads like
17077 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
17078 // Then we can turn this into a strided load by widening the vector elements
17079 // vlse32 p, stride=n
17080 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
17081                                             const RISCVSubtarget &Subtarget,
17082                                             const RISCVTargetLowering &TLI) {
17083   SDLoc DL(N);
17084   EVT VT = N->getValueType(0);
17085 
17086   // Only perform this combine on legal MVTs.
17087   if (!TLI.isTypeLegal(VT))
17088     return SDValue();
17089 
17090   // TODO: Potentially extend this to scalable vectors
17091   if (VT.isScalableVector())
17092     return SDValue();
17093 
17094   auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
17095   if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
17096       !SDValue(BaseLd, 0).hasOneUse())
17097     return SDValue();
17098 
17099   EVT BaseLdVT = BaseLd->getValueType(0);
17100 
17101   // Go through the loads and check that they're strided
17102   SmallVector<LoadSDNode *> Lds;
17103   Lds.push_back(BaseLd);
17104   Align Align = BaseLd->getAlign();
17105   for (SDValue Op : N->ops().drop_front()) {
17106     auto *Ld = dyn_cast<LoadSDNode>(Op);
17107     if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
17108         Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
17109         Ld->getValueType(0) != BaseLdVT)
17110       return SDValue();
17111 
17112     Lds.push_back(Ld);
17113 
17114     // The common alignment is the most restrictive (smallest) of all the loads
17115     Align = std::min(Align, Ld->getAlign());
17116   }
17117 
17118   using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
17119   auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
17120                            LoadSDNode *Ld2) -> std::optional<PtrDiff> {
17121     // If the load ptrs can be decomposed into a common (Base + Index) with a
17122     // common constant stride, then return the constant stride.
17123     BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
17124     BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
17125     if (BIO1.equalBaseIndex(BIO2, DAG))
17126       return {{BIO2.getOffset() - BIO1.getOffset(), false}};
17127 
17128     // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
17129     SDValue P1 = Ld1->getBasePtr();
17130     SDValue P2 = Ld2->getBasePtr();
17131     if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
17132       return {{P2.getOperand(1), false}};
17133     if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
17134       return {{P1.getOperand(1), true}};
17135 
17136     return std::nullopt;
17137   };
17138 
17139   // Get the distance between the first and second loads
17140   auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
17141   if (!BaseDiff)
17142     return SDValue();
17143 
17144   // Check all the loads are the same distance apart
17145   for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
17146     if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
17147       return SDValue();
17148 
17149   // TODO: At this point, we've successfully matched a generalized gather
17150   // load.  Maybe we should emit that, and then move the specialized
17151   // matchers above and below into a DAG combine?
17152 
17153   // Get the widened scalar type, e.g. v4i8 -> i64
17154   unsigned WideScalarBitWidth =
17155       BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
17156   MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
17157 
17158   // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
17159   MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
17160   if (!TLI.isTypeLegal(WideVecVT))
17161     return SDValue();
17162 
17163   // Check that the operation is legal
17164   if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
17165     return SDValue();
17166 
17167   auto [StrideVariant, MustNegateStride] = *BaseDiff;
17168   SDValue Stride =
17169       std::holds_alternative<SDValue>(StrideVariant)
17170           ? std::get<SDValue>(StrideVariant)
17171           : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
17172                                   Lds[0]->getOffset().getValueType());
17173   if (MustNegateStride)
17174     Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
17175 
17176   SDValue AllOneMask =
17177     DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
17178                  DAG.getConstant(1, DL, MVT::i1));
17179 
17180   uint64_t MemSize;
17181   if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
17182       ConstStride && ConstStride->getSExtValue() >= 0)
17183     // total size = (elsize * n) + (stride - elsize) * (n-1)
17184     //            = elsize + stride * (n-1)
17185     MemSize = WideScalarVT.getSizeInBits() +
17186               ConstStride->getSExtValue() * (N->getNumOperands() - 1);
17187   else
17188     // If Stride isn't constant, then we can't know how much it will load
17189     MemSize = MemoryLocation::UnknownSize;
17190 
17191   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
17192       BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17193       Align);
17194 
17195   SDValue StridedLoad = DAG.getStridedLoadVP(
17196       WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17197       AllOneMask,
17198       DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
17199 
17200   for (SDValue Ld : N->ops())
17201     DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
17202 
17203   return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
17204 }
17205 
17206 static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG,
17207                                             const RISCVSubtarget &Subtarget,
17208                                             const RISCVTargetLowering &TLI) {
17209   SDLoc DL(N);
17210   EVT VT = N->getValueType(0);
17211   const unsigned ElementSize = VT.getScalarSizeInBits();
17212   const unsigned NumElts = VT.getVectorNumElements();
17213   SDValue V1 = N->getOperand(0);
17214   SDValue V2 = N->getOperand(1);
17215   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
17216   MVT XLenVT = Subtarget.getXLenVT();
17217 
17218   // Recognized a disguised select of add/sub.
17219   bool SwapCC;
17220   if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
17221       matchSelectAddSub(V1, V2, SwapCC)) {
17222     SDValue Sub = SwapCC ? V1 : V2;
17223     SDValue A = Sub.getOperand(0);
17224     SDValue B = Sub.getOperand(1);
17225 
17226     SmallVector<SDValue> MaskVals;
17227     for (int MaskIndex : Mask) {
17228       bool SelectMaskVal = (MaskIndex < (int)NumElts);
17229       MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
17230     }
17231     assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
17232     EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
17233     SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
17234 
17235     // Arrange the select such that we can match a masked
17236     // vrsub.vi to perform the conditional negate
17237     SDValue NegB = DAG.getNegative(B, DL, VT);
17238     if (!SwapCC)
17239       CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
17240     SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
17241     return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
17242   }
17243 
17244   // Custom legalize <N x i128> or <N x i256> to <M x ELEN>.  This runs
17245   // during the combine phase before type legalization, and relies on
17246   // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
17247   // for the source mask.
17248   if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
17249       !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
17250       VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
17251     return SDValue();
17252 
17253   SmallVector<int, 8> NewMask;
17254   narrowShuffleMaskElts(2, Mask, NewMask);
17255 
17256   LLVMContext &C = *DAG.getContext();
17257   EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
17258   EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
17259   SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
17260                                      DAG.getBitcast(NewVT, V2), NewMask);
17261   return DAG.getBitcast(VT, Res);
17262 }
17263 
17264 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
17265                                const RISCVSubtarget &Subtarget) {
17266 
17267   assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
17268 
17269   if (N->getValueType(0).isFixedLengthVector())
17270     return SDValue();
17271 
17272   SDValue Addend = N->getOperand(0);
17273   SDValue MulOp = N->getOperand(1);
17274 
17275   if (N->getOpcode() == RISCVISD::ADD_VL) {
17276     SDValue AddPassthruOp = N->getOperand(2);
17277     if (!AddPassthruOp.isUndef())
17278       return SDValue();
17279   }
17280 
17281   auto IsVWMulOpc = [](unsigned Opc) {
17282     switch (Opc) {
17283     case RISCVISD::VWMUL_VL:
17284     case RISCVISD::VWMULU_VL:
17285     case RISCVISD::VWMULSU_VL:
17286       return true;
17287     default:
17288       return false;
17289     }
17290   };
17291 
17292   if (!IsVWMulOpc(MulOp.getOpcode()))
17293     std::swap(Addend, MulOp);
17294 
17295   if (!IsVWMulOpc(MulOp.getOpcode()))
17296     return SDValue();
17297 
17298   SDValue MulPassthruOp = MulOp.getOperand(2);
17299 
17300   if (!MulPassthruOp.isUndef())
17301     return SDValue();
17302 
17303   auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
17304                              const RISCVSubtarget &Subtarget) {
17305     if (N->getOpcode() == ISD::ADD) {
17306       SDLoc DL(N);
17307       return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
17308                                      Subtarget);
17309     }
17310     return std::make_pair(N->getOperand(3), N->getOperand(4));
17311   }(N, DAG, Subtarget);
17312 
17313   SDValue MulMask = MulOp.getOperand(3);
17314   SDValue MulVL = MulOp.getOperand(4);
17315 
17316   if (AddMask != MulMask || AddVL != MulVL)
17317     return SDValue();
17318 
17319   unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
17320   static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17321                 "Unexpected opcode after VWMACC_VL");
17322   static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17323                 "Unexpected opcode after VWMACC_VL!");
17324   static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17325                 "Unexpected opcode after VWMUL_VL!");
17326   static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17327                 "Unexpected opcode after VWMUL_VL!");
17328 
17329   SDLoc DL(N);
17330   EVT VT = N->getValueType(0);
17331   SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17332                    AddVL};
17333   return DAG.getNode(Opc, DL, VT, Ops);
17334 }
17335 
17336 static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
17337                                            ISD::MemIndexType &IndexType,
17338                                            RISCVTargetLowering::DAGCombinerInfo &DCI) {
17339   if (!DCI.isBeforeLegalize())
17340     return false;
17341 
17342   SelectionDAG &DAG = DCI.DAG;
17343   const MVT XLenVT =
17344     DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17345 
17346   const EVT IndexVT = Index.getValueType();
17347 
17348   // RISC-V indexed loads only support the "unsigned unscaled" addressing
17349   // mode, so anything else must be manually legalized.
17350   if (!isIndexTypeSigned(IndexType))
17351     return false;
17352 
17353   if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17354     // Any index legalization should first promote to XLenVT, so we don't lose
17355     // bits when scaling. This may create an illegal index type so we let
17356     // LLVM's legalization take care of the splitting.
17357     // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17358     Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17359                         IndexVT.changeVectorElementType(XLenVT), Index);
17360   }
17361   IndexType = ISD::UNSIGNED_SCALED;
17362   return true;
17363 }
17364 
17365 /// Match the index vector of a scatter or gather node as the shuffle mask
17366 /// which performs the rearrangement if possible.  Will only match if
17367 /// all lanes are touched, and thus replacing the scatter or gather with
17368 /// a unit strided access and shuffle is legal.
17369 static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17370                                 SmallVector<int> &ShuffleMask) {
17371   if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17372     return false;
17373   if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17374     return false;
17375 
17376   const unsigned ElementSize = VT.getScalarStoreSize();
17377   const unsigned NumElems = VT.getVectorNumElements();
17378 
17379   // Create the shuffle mask and check all bits active
17380   assert(ShuffleMask.empty());
17381   BitVector ActiveLanes(NumElems);
17382   for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17383     // TODO: We've found an active bit of UB, and could be
17384     // more aggressive here if desired.
17385     if (Index->getOperand(i)->isUndef())
17386       return false;
17387     uint64_t C = Index->getConstantOperandVal(i);
17388     if (C % ElementSize != 0)
17389       return false;
17390     C = C / ElementSize;
17391     if (C >= NumElems)
17392       return false;
17393     ShuffleMask.push_back(C);
17394     ActiveLanes.set(C);
17395   }
17396   return ActiveLanes.all();
17397 }
17398 
17399 /// Match the index of a gather or scatter operation as an operation
17400 /// with twice the element width and half the number of elements.  This is
17401 /// generally profitable (if legal) because these operations are linear
17402 /// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17403 /// come out ahead.
17404 static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17405                                 Align BaseAlign, const RISCVSubtarget &ST) {
17406   if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17407     return false;
17408   if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17409     return false;
17410 
17411   // Attempt a doubling.  If we can use a element type 4x or 8x in
17412   // size, this will happen via multiply iterations of the transform.
17413   const unsigned NumElems = VT.getVectorNumElements();
17414   if (NumElems % 2 != 0)
17415     return false;
17416 
17417   const unsigned ElementSize = VT.getScalarStoreSize();
17418   const unsigned WiderElementSize = ElementSize * 2;
17419   if (WiderElementSize > ST.getELen()/8)
17420     return false;
17421 
17422   if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17423     return false;
17424 
17425   for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17426     // TODO: We've found an active bit of UB, and could be
17427     // more aggressive here if desired.
17428     if (Index->getOperand(i)->isUndef())
17429       return false;
17430     // TODO: This offset check is too strict if we support fully
17431     // misaligned memory operations.
17432     uint64_t C = Index->getConstantOperandVal(i);
17433     if (i % 2 == 0) {
17434       if (C % WiderElementSize != 0)
17435         return false;
17436       continue;
17437     }
17438     uint64_t Last = Index->getConstantOperandVal(i-1);
17439     if (C != Last + ElementSize)
17440       return false;
17441   }
17442   return true;
17443 }
17444 
17445 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17446 // This would be benefit for the cases where X and Y are both the same value
17447 // type of low precision vectors. Since the truncate would be lowered into
17448 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17449 // restriction, such pattern would be expanded into a series of "vsetvli"
17450 // and "vnsrl" instructions later to reach this point.
17451 static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG) {
17452   SDValue Mask = N->getOperand(1);
17453   SDValue VL = N->getOperand(2);
17454 
17455   bool IsVLMAX = isAllOnesConstant(VL) ||
17456                  (isa<RegisterSDNode>(VL) &&
17457                   cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17458   if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17459       Mask.getOperand(0) != VL)
17460     return SDValue();
17461 
17462   auto IsTruncNode = [&](SDValue V) {
17463     return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17464            V.getOperand(1) == Mask && V.getOperand(2) == VL;
17465   };
17466 
17467   SDValue Op = N->getOperand(0);
17468 
17469   // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17470   // to distinguish such pattern.
17471   while (IsTruncNode(Op)) {
17472     if (!Op.hasOneUse())
17473       return SDValue();
17474     Op = Op.getOperand(0);
17475   }
17476 
17477   if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17478     return SDValue();
17479 
17480   SDValue N0 = Op.getOperand(0);
17481   SDValue N1 = Op.getOperand(1);
17482   if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17483       N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17484     return SDValue();
17485 
17486   SDValue N00 = N0.getOperand(0);
17487   SDValue N10 = N1.getOperand(0);
17488   if (!N00.getValueType().isVector() ||
17489       N00.getValueType() != N10.getValueType() ||
17490       N->getValueType(0) != N10.getValueType())
17491     return SDValue();
17492 
17493   unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17494   SDValue SMin =
17495       DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17496                   DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17497   return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17498 }
17499 
17500 // Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17501 // maximum value for the truncated type.
17502 // Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17503 // is the signed maximum value for the truncated type and C2 is the signed
17504 // minimum value.
17505 static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
17506                                     const RISCVSubtarget &Subtarget) {
17507   assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17508 
17509   MVT VT = N->getSimpleValueType(0);
17510 
17511   SDValue Mask = N->getOperand(1);
17512   SDValue VL = N->getOperand(2);
17513 
17514   auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17515                                   APInt &SplatVal) {
17516     if (V.getOpcode() != Opc &&
17517         !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17518           V.getOperand(3) == Mask && V.getOperand(4) == VL))
17519       return SDValue();
17520 
17521     SDValue Op = V.getOperand(1);
17522 
17523     // Peek through conversion between fixed and scalable vectors.
17524     if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17525         isNullConstant(Op.getOperand(2)) &&
17526         Op.getOperand(1).getValueType().isFixedLengthVector() &&
17527         Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17528         Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17529         isNullConstant(Op.getOperand(1).getOperand(1)))
17530       Op = Op.getOperand(1).getOperand(0);
17531 
17532     if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17533       return V.getOperand(0);
17534 
17535     if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17536         Op.getOperand(2) == VL) {
17537       if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17538         SplatVal =
17539             Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17540         return V.getOperand(0);
17541       }
17542     }
17543 
17544     return SDValue();
17545   };
17546 
17547   SDLoc DL(N);
17548 
17549   auto DetectUSatPattern = [&](SDValue V) {
17550     APInt LoC, HiC;
17551 
17552     // Simple case, V is a UMIN.
17553     if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17554       if (HiC.isMask(VT.getScalarSizeInBits()))
17555         return UMinOp;
17556 
17557     // If we have an SMAX that removes negative numbers first, then we can match
17558     // SMIN instead of UMIN.
17559     if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17560       if (SDValue SMaxOp =
17561               MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17562         if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17563           return SMinOp;
17564 
17565     // If we have an SMIN before an SMAX and the SMAX constant is less than or
17566     // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17567     // first.
17568     if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17569       if (SDValue SMinOp =
17570               MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17571         if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17572             HiC.uge(LoC))
17573           return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17574                              V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17575                              Mask, VL);
17576 
17577     return SDValue();
17578   };
17579 
17580   auto DetectSSatPattern = [&](SDValue V) {
17581     unsigned NumDstBits = VT.getScalarSizeInBits();
17582     unsigned NumSrcBits = V.getScalarValueSizeInBits();
17583     APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17584     APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17585 
17586     APInt HiC, LoC;
17587     if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17588       if (SDValue SMaxOp =
17589               MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17590         if (HiC == SignedMax && LoC == SignedMin)
17591           return SMaxOp;
17592 
17593     if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17594       if (SDValue SMinOp =
17595               MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17596         if (HiC == SignedMax && LoC == SignedMin)
17597           return SMinOp;
17598 
17599     return SDValue();
17600   };
17601 
17602   SDValue Src = N->getOperand(0);
17603 
17604   // Look through multiple layers of truncates.
17605   while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17606          Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17607          Src.hasOneUse())
17608     Src = Src.getOperand(0);
17609 
17610   SDValue Val;
17611   unsigned ClipOpc;
17612   if ((Val = DetectUSatPattern(Src)))
17613     ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
17614   else if ((Val = DetectSSatPattern(Src)))
17615     ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
17616   else
17617     return SDValue();
17618 
17619   MVT ValVT = Val.getSimpleValueType();
17620 
17621   do {
17622     MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17623     ValVT = ValVT.changeVectorElementType(ValEltVT);
17624     Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17625   } while (ValVT != VT);
17626 
17627   return Val;
17628 }
17629 
17630 // Convert
17631 //   (iX ctpop (bitcast (vXi1 A)))
17632 // ->
17633 //   (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17634 // FIXME: It's complicated to match all the variations of this after type
17635 // legalization so we only handle the pre-type legalization pattern, but that
17636 // requires the fixed vector type to be legal.
17637 static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG,
17638                                          const RISCVSubtarget &Subtarget) {
17639   EVT VT = N->getValueType(0);
17640   if (!VT.isScalarInteger())
17641     return SDValue();
17642 
17643   SDValue Src = N->getOperand(0);
17644 
17645   // Peek through zero_extend. It doesn't change the count.
17646   if (Src.getOpcode() == ISD::ZERO_EXTEND)
17647     Src = Src.getOperand(0);
17648 
17649   if (Src.getOpcode() != ISD::BITCAST)
17650     return SDValue();
17651 
17652   Src = Src.getOperand(0);
17653   EVT SrcEVT = Src.getValueType();
17654   if (!SrcEVT.isSimple())
17655     return SDValue();
17656 
17657   MVT SrcMVT = SrcEVT.getSimpleVT();
17658   // Make sure the input is an i1 vector.
17659   if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17660     return SDValue();
17661 
17662   if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17663     return SDValue();
17664 
17665   MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17666   Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17667 
17668   SDLoc DL(N);
17669   auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17670 
17671   MVT XLenVT = Subtarget.getXLenVT();
17672   SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17673   return DAG.getZExtOrTrunc(Pop, DL, VT);
17674 }
17675 
17676 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
17677                                                DAGCombinerInfo &DCI) const {
17678   SelectionDAG &DAG = DCI.DAG;
17679   const MVT XLenVT = Subtarget.getXLenVT();
17680   SDLoc DL(N);
17681 
17682   // Helper to call SimplifyDemandedBits on an operand of N where only some low
17683   // bits are demanded. N will be added to the Worklist if it was not deleted.
17684   // Caller should return SDValue(N, 0) if this returns true.
17685   auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17686     SDValue Op = N->getOperand(OpNo);
17687     APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17688     if (!SimplifyDemandedBits(Op, Mask, DCI))
17689       return false;
17690 
17691     if (N->getOpcode() != ISD::DELETED_NODE)
17692       DCI.AddToWorklist(N);
17693     return true;
17694   };
17695 
17696   switch (N->getOpcode()) {
17697   default:
17698     break;
17699   case RISCVISD::SplitF64: {
17700     SDValue Op0 = N->getOperand(0);
17701     // If the input to SplitF64 is just BuildPairF64 then the operation is
17702     // redundant. Instead, use BuildPairF64's operands directly.
17703     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17704       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17705 
17706     if (Op0->isUndef()) {
17707       SDValue Lo = DAG.getUNDEF(MVT::i32);
17708       SDValue Hi = DAG.getUNDEF(MVT::i32);
17709       return DCI.CombineTo(N, Lo, Hi);
17710     }
17711 
17712     // It's cheaper to materialise two 32-bit integers than to load a double
17713     // from the constant pool and transfer it to integer registers through the
17714     // stack.
17715     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17716       APInt V = C->getValueAPF().bitcastToAPInt();
17717       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17718       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17719       return DCI.CombineTo(N, Lo, Hi);
17720     }
17721 
17722     // This is a target-specific version of a DAGCombine performed in
17723     // DAGCombiner::visitBITCAST. It performs the equivalent of:
17724     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17725     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17726     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17727         !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17728       break;
17729     SDValue NewSplitF64 =
17730         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17731                     Op0.getOperand(0));
17732     SDValue Lo = NewSplitF64.getValue(0);
17733     SDValue Hi = NewSplitF64.getValue(1);
17734     APInt SignBit = APInt::getSignMask(32);
17735     if (Op0.getOpcode() == ISD::FNEG) {
17736       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17737                                   DAG.getConstant(SignBit, DL, MVT::i32));
17738       return DCI.CombineTo(N, Lo, NewHi);
17739     }
17740     assert(Op0.getOpcode() == ISD::FABS);
17741     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17742                                 DAG.getConstant(~SignBit, DL, MVT::i32));
17743     return DCI.CombineTo(N, Lo, NewHi);
17744   }
17745   case RISCVISD::SLLW:
17746   case RISCVISD::SRAW:
17747   case RISCVISD::SRLW:
17748   case RISCVISD::RORW:
17749   case RISCVISD::ROLW: {
17750     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17751     if (SimplifyDemandedLowBitsHelper(0, 32) ||
17752         SimplifyDemandedLowBitsHelper(1, 5))
17753       return SDValue(N, 0);
17754 
17755     break;
17756   }
17757   case RISCVISD::CLZW:
17758   case RISCVISD::CTZW: {
17759     // Only the lower 32 bits of the first operand are read
17760     if (SimplifyDemandedLowBitsHelper(0, 32))
17761       return SDValue(N, 0);
17762     break;
17763   }
17764   case RISCVISD::FMV_W_X_RV64: {
17765     // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17766     // conversion is unnecessary and can be replaced with the
17767     // FMV_X_ANYEXTW_RV64 operand.
17768     SDValue Op0 = N->getOperand(0);
17769     if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
17770       return Op0.getOperand(0);
17771     break;
17772   }
17773   case RISCVISD::FMV_X_ANYEXTH:
17774   case RISCVISD::FMV_X_ANYEXTW_RV64: {
17775     SDLoc DL(N);
17776     SDValue Op0 = N->getOperand(0);
17777     MVT VT = N->getSimpleValueType(0);
17778 
17779     // Constant fold.
17780     if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17781       APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17782       return DAG.getConstant(Val, DL, VT);
17783     }
17784 
17785     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17786     // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17787     // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17788     if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17789          Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17790         (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17791          Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17792       assert(Op0.getOperand(0).getValueType() == VT &&
17793              "Unexpected value type!");
17794       return Op0.getOperand(0);
17795     }
17796 
17797     if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17798         cast<LoadSDNode>(Op0)->isSimple()) {
17799       MVT IVT = MVT::getIntegerVT(Op0.getValueSizeInBits());
17800       auto *LN0 = cast<LoadSDNode>(Op0);
17801       SDValue Load =
17802           DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17803                          LN0->getBasePtr(), IVT, LN0->getMemOperand());
17804       DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17805       return Load;
17806     }
17807 
17808     // This is a target-specific version of a DAGCombine performed in
17809     // DAGCombiner::visitBITCAST. It performs the equivalent of:
17810     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17811     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17812     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17813         !Op0.getNode()->hasOneUse())
17814       break;
17815     SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17816     unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17817     APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17818     if (Op0.getOpcode() == ISD::FNEG)
17819       return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17820                          DAG.getConstant(SignBit, DL, VT));
17821 
17822     assert(Op0.getOpcode() == ISD::FABS);
17823     return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17824                        DAG.getConstant(~SignBit, DL, VT));
17825   }
17826   case ISD::ABS: {
17827     EVT VT = N->getValueType(0);
17828     SDValue N0 = N->getOperand(0);
17829     // abs (sext) -> zext (abs)
17830     // abs (zext) -> zext (handled elsewhere)
17831     if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17832       SDValue Src = N0.getOperand(0);
17833       SDLoc DL(N);
17834       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17835                          DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17836     }
17837     break;
17838   }
17839   case ISD::ADD: {
17840     if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17841       return V;
17842     if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17843       return V;
17844     return performADDCombine(N, DCI, Subtarget);
17845   }
17846   case ISD::SUB: {
17847     if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17848       return V;
17849     return performSUBCombine(N, DAG, Subtarget);
17850   }
17851   case ISD::AND:
17852     return performANDCombine(N, DCI, Subtarget);
17853   case ISD::OR: {
17854     if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17855       return V;
17856     return performORCombine(N, DCI, Subtarget);
17857   }
17858   case ISD::XOR:
17859     return performXORCombine(N, DAG, Subtarget);
17860   case ISD::MUL:
17861     if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17862       return V;
17863     return performMULCombine(N, DAG, DCI, Subtarget);
17864   case ISD::SDIV:
17865   case ISD::UDIV:
17866   case ISD::SREM:
17867   case ISD::UREM:
17868     if (SDValue V = combineBinOpOfZExt(N, DAG))
17869       return V;
17870     break;
17871   case ISD::FMUL: {
17872     // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17873     SDValue N0 = N->getOperand(0);
17874     SDValue N1 = N->getOperand(1);
17875     if (N0->getOpcode() != ISD::FCOPYSIGN)
17876       std::swap(N0, N1);
17877     if (N0->getOpcode() != ISD::FCOPYSIGN)
17878       return SDValue();
17879     ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17880     if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17881       return SDValue();
17882     EVT VT = N->getValueType(0);
17883     if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17884       return SDValue();
17885     SDValue Sign = N0->getOperand(1);
17886     if (Sign.getValueType() != VT)
17887       return SDValue();
17888     return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17889   }
17890   case ISD::FADD:
17891   case ISD::UMAX:
17892   case ISD::UMIN:
17893   case ISD::SMAX:
17894   case ISD::SMIN:
17895   case ISD::FMAXNUM:
17896   case ISD::FMINNUM: {
17897     if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17898       return V;
17899     if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17900       return V;
17901     return SDValue();
17902   }
17903   case ISD::SETCC:
17904     return performSETCCCombine(N, DAG, Subtarget);
17905   case ISD::SIGN_EXTEND_INREG:
17906     return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17907   case ISD::ZERO_EXTEND:
17908     // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17909     // type legalization. This is safe because fp_to_uint produces poison if
17910     // it overflows.
17911     if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17912       SDValue Src = N->getOperand(0);
17913       if (Src.getOpcode() == ISD::FP_TO_UINT &&
17914           isTypeLegal(Src.getOperand(0).getValueType()))
17915         return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17916                            Src.getOperand(0));
17917       if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17918           isTypeLegal(Src.getOperand(1).getValueType())) {
17919         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17920         SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17921                                   Src.getOperand(0), Src.getOperand(1));
17922         DCI.CombineTo(N, Res);
17923         DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17924         DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17925         return SDValue(N, 0); // Return N so it doesn't get rechecked.
17926       }
17927     }
17928     return SDValue();
17929   case RISCVISD::TRUNCATE_VECTOR_VL:
17930     if (SDValue V = combineTruncOfSraSext(N, DAG))
17931       return V;
17932     return combineTruncToVnclip(N, DAG, Subtarget);
17933   case ISD::TRUNCATE:
17934     return performTRUNCATECombine(N, DAG, Subtarget);
17935   case ISD::SELECT:
17936     return performSELECTCombine(N, DAG, Subtarget);
17937   case ISD::VSELECT:
17938     return performVSELECTCombine(N, DAG);
17939   case RISCVISD::CZERO_EQZ:
17940   case RISCVISD::CZERO_NEZ: {
17941     SDValue Val = N->getOperand(0);
17942     SDValue Cond = N->getOperand(1);
17943 
17944     unsigned Opc = N->getOpcode();
17945 
17946     // czero_eqz x, x -> x
17947     if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17948       return Val;
17949 
17950     unsigned InvOpc =
17951         Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
17952 
17953     // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17954     // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17955     if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17956       SDValue NewCond = Cond.getOperand(0);
17957       APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17958       if (DAG.MaskedValueIsZero(NewCond, Mask))
17959         return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17960     }
17961     // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17962     // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17963     // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17964     // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17965     if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17966       ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17967       if (ISD::isIntEqualitySetCC(CCVal))
17968         return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17969                            N->getValueType(0), Val, Cond.getOperand(0));
17970     }
17971     return SDValue();
17972   }
17973   case RISCVISD::SELECT_CC: {
17974     // Transform
17975     SDValue LHS = N->getOperand(0);
17976     SDValue RHS = N->getOperand(1);
17977     SDValue CC = N->getOperand(2);
17978     ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17979     SDValue TrueV = N->getOperand(3);
17980     SDValue FalseV = N->getOperand(4);
17981     SDLoc DL(N);
17982     EVT VT = N->getValueType(0);
17983 
17984     // If the True and False values are the same, we don't need a select_cc.
17985     if (TrueV == FalseV)
17986       return TrueV;
17987 
17988     // (select (x < 0), y, z)  -> x >> (XLEN - 1) & (y - z) + z
17989     // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17990     if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17991         isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17992         (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17993       if (CCVal == ISD::CondCode::SETGE)
17994         std::swap(TrueV, FalseV);
17995 
17996       int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17997       int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17998       // Only handle simm12, if it is not in this range, it can be considered as
17999       // register.
18000       if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
18001           isInt<12>(TrueSImm - FalseSImm)) {
18002         SDValue SRA =
18003             DAG.getNode(ISD::SRA, DL, VT, LHS,
18004                         DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
18005         SDValue AND =
18006             DAG.getNode(ISD::AND, DL, VT, SRA,
18007                         DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
18008         return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
18009       }
18010 
18011       if (CCVal == ISD::CondCode::SETGE)
18012         std::swap(TrueV, FalseV);
18013     }
18014 
18015     if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18016       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
18017                          {LHS, RHS, CC, TrueV, FalseV});
18018 
18019     if (!Subtarget.hasConditionalMoveFusion()) {
18020       // (select c, -1, y) -> -c | y
18021       if (isAllOnesConstant(TrueV)) {
18022         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
18023         SDValue Neg = DAG.getNegative(C, DL, VT);
18024         return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
18025       }
18026       // (select c, y, -1) -> -!c | y
18027       if (isAllOnesConstant(FalseV)) {
18028         SDValue C =
18029             DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
18030         SDValue Neg = DAG.getNegative(C, DL, VT);
18031         return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
18032       }
18033 
18034       // (select c, 0, y) -> -!c & y
18035       if (isNullConstant(TrueV)) {
18036         SDValue C =
18037             DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
18038         SDValue Neg = DAG.getNegative(C, DL, VT);
18039         return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
18040       }
18041       // (select c, y, 0) -> -c & y
18042       if (isNullConstant(FalseV)) {
18043         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
18044         SDValue Neg = DAG.getNegative(C, DL, VT);
18045         return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
18046       }
18047       // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
18048       // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
18049       if (((isOneConstant(FalseV) && LHS == TrueV &&
18050             CCVal == ISD::CondCode::SETNE) ||
18051            (isOneConstant(TrueV) && LHS == FalseV &&
18052             CCVal == ISD::CondCode::SETEQ)) &&
18053           isNullConstant(RHS)) {
18054         // freeze it to be safe.
18055         LHS = DAG.getFreeze(LHS);
18056         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
18057         return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
18058       }
18059     }
18060 
18061     // If both true/false are an xor with 1, pull through the select.
18062     // This can occur after op legalization if both operands are setccs that
18063     // require an xor to invert.
18064     // FIXME: Generalize to other binary ops with identical operand?
18065     if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
18066         TrueV.getOperand(1) == FalseV.getOperand(1) &&
18067         isOneConstant(TrueV.getOperand(1)) &&
18068         TrueV.hasOneUse() && FalseV.hasOneUse()) {
18069       SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
18070                                    TrueV.getOperand(0), FalseV.getOperand(0));
18071       return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
18072     }
18073 
18074     return SDValue();
18075   }
18076   case RISCVISD::BR_CC: {
18077     SDValue LHS = N->getOperand(1);
18078     SDValue RHS = N->getOperand(2);
18079     SDValue CC = N->getOperand(3);
18080     SDLoc DL(N);
18081 
18082     if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18083       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
18084                          N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
18085 
18086     return SDValue();
18087   }
18088   case ISD::BITREVERSE:
18089     return performBITREVERSECombine(N, DAG, Subtarget);
18090   case ISD::FP_TO_SINT:
18091   case ISD::FP_TO_UINT:
18092     return performFP_TO_INTCombine(N, DCI, Subtarget);
18093   case ISD::FP_TO_SINT_SAT:
18094   case ISD::FP_TO_UINT_SAT:
18095     return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
18096   case ISD::FCOPYSIGN: {
18097     EVT VT = N->getValueType(0);
18098     if (!VT.isVector())
18099       break;
18100     // There is a form of VFSGNJ which injects the negated sign of its second
18101     // operand. Try and bubble any FNEG up after the extend/round to produce
18102     // this optimized pattern. Avoid modifying cases where FP_ROUND and
18103     // TRUNC=1.
18104     SDValue In2 = N->getOperand(1);
18105     // Avoid cases where the extend/round has multiple uses, as duplicating
18106     // those is typically more expensive than removing a fneg.
18107     if (!In2.hasOneUse())
18108       break;
18109     if (In2.getOpcode() != ISD::FP_EXTEND &&
18110         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
18111       break;
18112     In2 = In2.getOperand(0);
18113     if (In2.getOpcode() != ISD::FNEG)
18114       break;
18115     SDLoc DL(N);
18116     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
18117     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
18118                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
18119   }
18120   case ISD::MGATHER: {
18121     const auto *MGN = cast<MaskedGatherSDNode>(N);
18122     const EVT VT = N->getValueType(0);
18123     SDValue Index = MGN->getIndex();
18124     SDValue ScaleOp = MGN->getScale();
18125     ISD::MemIndexType IndexType = MGN->getIndexType();
18126     assert(!MGN->isIndexScaled() &&
18127            "Scaled gather/scatter should not be formed");
18128 
18129     SDLoc DL(N);
18130     if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18131       return DAG.getMaskedGather(
18132           N->getVTList(), MGN->getMemoryVT(), DL,
18133           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18134            MGN->getBasePtr(), Index, ScaleOp},
18135           MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18136 
18137     if (narrowIndex(Index, IndexType, DAG))
18138       return DAG.getMaskedGather(
18139           N->getVTList(), MGN->getMemoryVT(), DL,
18140           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18141            MGN->getBasePtr(), Index, ScaleOp},
18142           MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18143 
18144     if (Index.getOpcode() == ISD::BUILD_VECTOR &&
18145         MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
18146       // The sequence will be XLenVT, not the type of Index. Tell
18147       // isSimpleVIDSequence this so we avoid overflow.
18148       if (std::optional<VIDSequence> SimpleVID =
18149               isSimpleVIDSequence(Index, Subtarget.getXLen());
18150           SimpleVID && SimpleVID->StepDenominator == 1) {
18151         const int64_t StepNumerator = SimpleVID->StepNumerator;
18152         const int64_t Addend = SimpleVID->Addend;
18153 
18154         // Note: We don't need to check alignment here since (by assumption
18155         // from the existance of the gather), our offsets must be sufficiently
18156         // aligned.
18157 
18158         const EVT PtrVT = getPointerTy(DAG.getDataLayout());
18159         assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
18160         assert(IndexType == ISD::UNSIGNED_SCALED);
18161         SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
18162                                       DAG.getSignedConstant(Addend, DL, PtrVT));
18163 
18164         SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
18165                                           VT.getVectorElementCount());
18166         SDValue StridedLoad = DAG.getStridedLoadVP(
18167             VT, DL, MGN->getChain(), BasePtr,
18168             DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
18169             EVL, MGN->getMemOperand());
18170         SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
18171                                        StridedLoad, MGN->getPassThru(), EVL);
18172         return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
18173                                   DL);
18174       }
18175     }
18176 
18177     SmallVector<int> ShuffleMask;
18178     if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18179         matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
18180       SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
18181                                        MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
18182                                        MGN->getMask(), DAG.getUNDEF(VT),
18183                                        MGN->getMemoryVT(), MGN->getMemOperand(),
18184                                        ISD::UNINDEXED, ISD::NON_EXTLOAD);
18185       SDValue Shuffle =
18186         DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
18187       return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
18188     }
18189 
18190     if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18191         matchIndexAsWiderOp(VT, Index, MGN->getMask(),
18192                             MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
18193       SmallVector<SDValue> NewIndices;
18194       for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
18195         NewIndices.push_back(Index.getOperand(i));
18196       EVT IndexVT = Index.getValueType()
18197         .getHalfNumVectorElementsVT(*DAG.getContext());
18198       Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
18199 
18200       unsigned ElementSize = VT.getScalarStoreSize();
18201       EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
18202       auto EltCnt = VT.getVectorElementCount();
18203       assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
18204       EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
18205                                     EltCnt.divideCoefficientBy(2));
18206       SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
18207       EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
18208                                     EltCnt.divideCoefficientBy(2));
18209       SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
18210 
18211       SDValue Gather =
18212         DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
18213                             {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
18214                              Index, ScaleOp},
18215                             MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
18216       SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
18217       return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
18218     }
18219     break;
18220   }
18221   case ISD::MSCATTER:{
18222     const auto *MSN = cast<MaskedScatterSDNode>(N);
18223     SDValue Index = MSN->getIndex();
18224     SDValue ScaleOp = MSN->getScale();
18225     ISD::MemIndexType IndexType = MSN->getIndexType();
18226     assert(!MSN->isIndexScaled() &&
18227            "Scaled gather/scatter should not be formed");
18228 
18229     SDLoc DL(N);
18230     if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18231       return DAG.getMaskedScatter(
18232           N->getVTList(), MSN->getMemoryVT(), DL,
18233           {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18234            Index, ScaleOp},
18235           MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18236 
18237     if (narrowIndex(Index, IndexType, DAG))
18238       return DAG.getMaskedScatter(
18239           N->getVTList(), MSN->getMemoryVT(), DL,
18240           {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18241            Index, ScaleOp},
18242           MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18243 
18244     EVT VT = MSN->getValue()->getValueType(0);
18245     SmallVector<int> ShuffleMask;
18246     if (!MSN->isTruncatingStore() &&
18247         matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
18248       SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
18249                                              DAG.getUNDEF(VT), ShuffleMask);
18250       return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
18251                                 DAG.getUNDEF(XLenVT), MSN->getMask(),
18252                                 MSN->getMemoryVT(), MSN->getMemOperand(),
18253                                 ISD::UNINDEXED, false);
18254     }
18255     break;
18256   }
18257   case ISD::VP_GATHER: {
18258     const auto *VPGN = cast<VPGatherSDNode>(N);
18259     SDValue Index = VPGN->getIndex();
18260     SDValue ScaleOp = VPGN->getScale();
18261     ISD::MemIndexType IndexType = VPGN->getIndexType();
18262     assert(!VPGN->isIndexScaled() &&
18263            "Scaled gather/scatter should not be formed");
18264 
18265     SDLoc DL(N);
18266     if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18267       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18268                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
18269                               ScaleOp, VPGN->getMask(),
18270                               VPGN->getVectorLength()},
18271                              VPGN->getMemOperand(), IndexType);
18272 
18273     if (narrowIndex(Index, IndexType, DAG))
18274       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18275                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
18276                               ScaleOp, VPGN->getMask(),
18277                               VPGN->getVectorLength()},
18278                              VPGN->getMemOperand(), IndexType);
18279 
18280     break;
18281   }
18282   case ISD::VP_SCATTER: {
18283     const auto *VPSN = cast<VPScatterSDNode>(N);
18284     SDValue Index = VPSN->getIndex();
18285     SDValue ScaleOp = VPSN->getScale();
18286     ISD::MemIndexType IndexType = VPSN->getIndexType();
18287     assert(!VPSN->isIndexScaled() &&
18288            "Scaled gather/scatter should not be formed");
18289 
18290     SDLoc DL(N);
18291     if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18292       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18293                               {VPSN->getChain(), VPSN->getValue(),
18294                                VPSN->getBasePtr(), Index, ScaleOp,
18295                                VPSN->getMask(), VPSN->getVectorLength()},
18296                               VPSN->getMemOperand(), IndexType);
18297 
18298     if (narrowIndex(Index, IndexType, DAG))
18299       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18300                               {VPSN->getChain(), VPSN->getValue(),
18301                                VPSN->getBasePtr(), Index, ScaleOp,
18302                                VPSN->getMask(), VPSN->getVectorLength()},
18303                               VPSN->getMemOperand(), IndexType);
18304     break;
18305   }
18306   case RISCVISD::SHL_VL:
18307     if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18308       return V;
18309     [[fallthrough]];
18310   case RISCVISD::SRA_VL:
18311   case RISCVISD::SRL_VL: {
18312     SDValue ShAmt = N->getOperand(1);
18313     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
18314       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18315       SDLoc DL(N);
18316       SDValue VL = N->getOperand(4);
18317       EVT VT = N->getValueType(0);
18318       ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18319                           ShAmt.getOperand(1), VL);
18320       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
18321                          N->getOperand(2), N->getOperand(3), N->getOperand(4));
18322     }
18323     break;
18324   }
18325   case ISD::SRA:
18326     if (SDValue V = performSRACombine(N, DAG, Subtarget))
18327       return V;
18328     [[fallthrough]];
18329   case ISD::SRL:
18330   case ISD::SHL: {
18331     if (N->getOpcode() == ISD::SHL) {
18332       if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18333         return V;
18334     }
18335     SDValue ShAmt = N->getOperand(1);
18336     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
18337       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18338       SDLoc DL(N);
18339       EVT VT = N->getValueType(0);
18340       ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18341                           ShAmt.getOperand(1),
18342                           DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18343       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18344     }
18345     break;
18346   }
18347   case RISCVISD::ADD_VL:
18348     if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18349       return V;
18350     return combineToVWMACC(N, DAG, Subtarget);
18351   case RISCVISD::VWADD_W_VL:
18352   case RISCVISD::VWADDU_W_VL:
18353   case RISCVISD::VWSUB_W_VL:
18354   case RISCVISD::VWSUBU_W_VL:
18355     return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18356   case RISCVISD::SUB_VL:
18357   case RISCVISD::MUL_VL:
18358     return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18359   case RISCVISD::VFMADD_VL:
18360   case RISCVISD::VFNMADD_VL:
18361   case RISCVISD::VFMSUB_VL:
18362   case RISCVISD::VFNMSUB_VL:
18363   case RISCVISD::STRICT_VFMADD_VL:
18364   case RISCVISD::STRICT_VFNMADD_VL:
18365   case RISCVISD::STRICT_VFMSUB_VL:
18366   case RISCVISD::STRICT_VFNMSUB_VL:
18367     return performVFMADD_VLCombine(N, DCI, Subtarget);
18368   case RISCVISD::FADD_VL:
18369   case RISCVISD::FSUB_VL:
18370   case RISCVISD::FMUL_VL:
18371   case RISCVISD::VFWADD_W_VL:
18372   case RISCVISD::VFWSUB_W_VL:
18373     return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18374   case ISD::LOAD:
18375   case ISD::STORE: {
18376     if (DCI.isAfterLegalizeDAG())
18377       if (SDValue V = performMemPairCombine(N, DCI))
18378         return V;
18379 
18380     if (N->getOpcode() != ISD::STORE)
18381       break;
18382 
18383     auto *Store = cast<StoreSDNode>(N);
18384     SDValue Chain = Store->getChain();
18385     EVT MemVT = Store->getMemoryVT();
18386     SDValue Val = Store->getValue();
18387     SDLoc DL(N);
18388 
18389     bool IsScalarizable =
18390         MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18391         Store->isSimple() &&
18392         MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18393         isPowerOf2_64(MemVT.getSizeInBits()) &&
18394         MemVT.getSizeInBits() <= Subtarget.getXLen();
18395 
18396     // If sufficiently aligned we can scalarize stores of constant vectors of
18397     // any power-of-two size up to XLen bits, provided that they aren't too
18398     // expensive to materialize.
18399     //   vsetivli   zero, 2, e8, m1, ta, ma
18400     //   vmv.v.i    v8, 4
18401     //   vse64.v    v8, (a0)
18402     // ->
18403     //   li     a1, 1028
18404     //   sh     a1, 0(a0)
18405     if (DCI.isBeforeLegalize() && IsScalarizable &&
18406         ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
18407       // Get the constant vector bits
18408       APInt NewC(Val.getValueSizeInBits(), 0);
18409       uint64_t EltSize = Val.getScalarValueSizeInBits();
18410       for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18411         if (Val.getOperand(i).isUndef())
18412           continue;
18413         NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18414                         i * EltSize);
18415       }
18416       MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18417 
18418       if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18419                                      true) <= 2 &&
18420           allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
18421                                          NewVT, *Store->getMemOperand())) {
18422         SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18423         return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18424                             Store->getPointerInfo(), Store->getOriginalAlign(),
18425                             Store->getMemOperand()->getFlags());
18426       }
18427     }
18428 
18429     // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18430     //   vsetivli   zero, 2, e16, m1, ta, ma
18431     //   vle16.v    v8, (a0)
18432     //   vse16.v    v8, (a1)
18433     if (auto *L = dyn_cast<LoadSDNode>(Val);
18434         L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18435         L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18436         Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18437         L->getMemoryVT() == MemVT) {
18438       MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18439       if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
18440                                          NewVT, *Store->getMemOperand()) &&
18441           allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
18442                                          NewVT, *L->getMemOperand())) {
18443         SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18444                                    L->getPointerInfo(), L->getOriginalAlign(),
18445                                    L->getMemOperand()->getFlags());
18446         return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18447                             Store->getPointerInfo(), Store->getOriginalAlign(),
18448                             Store->getMemOperand()->getFlags());
18449       }
18450     }
18451 
18452     // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18453     // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18454     // any illegal types.
18455     if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18456         (DCI.isAfterLegalizeDAG() &&
18457          Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18458          isNullConstant(Val.getOperand(1)))) {
18459       SDValue Src = Val.getOperand(0);
18460       MVT VecVT = Src.getSimpleValueType();
18461       // VecVT should be scalable and memory VT should match the element type.
18462       if (!Store->isIndexed() && VecVT.isScalableVector() &&
18463           MemVT == VecVT.getVectorElementType()) {
18464         SDLoc DL(N);
18465         MVT MaskVT = getMaskTypeFor(VecVT);
18466         return DAG.getStoreVP(
18467             Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18468             DAG.getConstant(1, DL, MaskVT),
18469             DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18470             Store->getMemOperand(), Store->getAddressingMode(),
18471             Store->isTruncatingStore(), /*IsCompress*/ false);
18472       }
18473     }
18474 
18475     break;
18476   }
18477   case ISD::SPLAT_VECTOR: {
18478     EVT VT = N->getValueType(0);
18479     // Only perform this combine on legal MVT types.
18480     if (!isTypeLegal(VT))
18481       break;
18482     if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18483                                          DAG, Subtarget))
18484       return Gather;
18485     break;
18486   }
18487   case ISD::BUILD_VECTOR:
18488     if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18489       return V;
18490     break;
18491   case ISD::CONCAT_VECTORS:
18492     if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18493       return V;
18494     break;
18495   case ISD::VECTOR_SHUFFLE:
18496     if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
18497       return V;
18498     break;
18499   case ISD::INSERT_VECTOR_ELT:
18500     if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18501       return V;
18502     break;
18503   case RISCVISD::VFMV_V_F_VL: {
18504     const MVT VT = N->getSimpleValueType(0);
18505     SDValue Passthru = N->getOperand(0);
18506     SDValue Scalar = N->getOperand(1);
18507     SDValue VL = N->getOperand(2);
18508 
18509     // If VL is 1, we can use vfmv.s.f.
18510     if (isOneConstant(VL))
18511       return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18512     break;
18513   }
18514   case RISCVISD::VMV_V_X_VL: {
18515     const MVT VT = N->getSimpleValueType(0);
18516     SDValue Passthru = N->getOperand(0);
18517     SDValue Scalar = N->getOperand(1);
18518     SDValue VL = N->getOperand(2);
18519 
18520     // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18521     // scalar input.
18522     unsigned ScalarSize = Scalar.getValueSizeInBits();
18523     unsigned EltWidth = VT.getScalarSizeInBits();
18524     if (ScalarSize > EltWidth && Passthru.isUndef())
18525       if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18526         return SDValue(N, 0);
18527 
18528     // If VL is 1 and the scalar value won't benefit from immediate, we can
18529     // use vmv.s.x.
18530     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18531     if (isOneConstant(VL) &&
18532         (!Const || Const->isZero() ||
18533          !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18534       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18535 
18536     break;
18537   }
18538   case RISCVISD::VFMV_S_F_VL: {
18539     SDValue Src = N->getOperand(1);
18540     // Try to remove vector->scalar->vector if the scalar->vector is inserting
18541     // into an undef vector.
18542     // TODO: Could use a vslide or vmv.v.v for non-undef.
18543     if (N->getOperand(0).isUndef() &&
18544         Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18545         isNullConstant(Src.getOperand(1)) &&
18546         Src.getOperand(0).getValueType().isScalableVector()) {
18547       EVT VT = N->getValueType(0);
18548       EVT SrcVT = Src.getOperand(0).getValueType();
18549       assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
18550       // Widths match, just return the original vector.
18551       if (SrcVT == VT)
18552         return Src.getOperand(0);
18553       // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18554     }
18555     [[fallthrough]];
18556   }
18557   case RISCVISD::VMV_S_X_VL: {
18558     const MVT VT = N->getSimpleValueType(0);
18559     SDValue Passthru = N->getOperand(0);
18560     SDValue Scalar = N->getOperand(1);
18561     SDValue VL = N->getOperand(2);
18562 
18563     if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18564         Scalar.getOperand(0).getValueType() == N->getValueType(0))
18565       return Scalar.getOperand(0);
18566 
18567     // Use M1 or smaller to avoid over constraining register allocation
18568     const MVT M1VT = getLMUL1VT(VT);
18569     if (M1VT.bitsLT(VT)) {
18570       SDValue M1Passthru =
18571           DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18572                       DAG.getVectorIdxConstant(0, DL));
18573       SDValue Result =
18574           DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18575       Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18576                            DAG.getVectorIdxConstant(0, DL));
18577       return Result;
18578     }
18579 
18580     // We use a vmv.v.i if possible.  We limit this to LMUL1.  LMUL2 or
18581     // higher would involve overly constraining the register allocator for
18582     // no purpose.
18583     if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18584         Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18585         VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18586       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18587 
18588     break;
18589   }
18590   case RISCVISD::VMV_X_S: {
18591     SDValue Vec = N->getOperand(0);
18592     MVT VecVT = N->getOperand(0).getSimpleValueType();
18593     const MVT M1VT = getLMUL1VT(VecVT);
18594     if (M1VT.bitsLT(VecVT)) {
18595       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18596                         DAG.getVectorIdxConstant(0, DL));
18597       return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18598     }
18599     break;
18600   }
18601   case ISD::INTRINSIC_VOID:
18602   case ISD::INTRINSIC_W_CHAIN:
18603   case ISD::INTRINSIC_WO_CHAIN: {
18604     unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18605     unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18606     switch (IntNo) {
18607       // By default we do not combine any intrinsic.
18608     default:
18609       return SDValue();
18610     case Intrinsic::riscv_vcpop:
18611     case Intrinsic::riscv_vcpop_mask:
18612     case Intrinsic::riscv_vfirst:
18613     case Intrinsic::riscv_vfirst_mask: {
18614       SDValue VL = N->getOperand(2);
18615       if (IntNo == Intrinsic::riscv_vcpop_mask ||
18616           IntNo == Intrinsic::riscv_vfirst_mask)
18617         VL = N->getOperand(3);
18618       if (!isNullConstant(VL))
18619         return SDValue();
18620       // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18621       SDLoc DL(N);
18622       EVT VT = N->getValueType(0);
18623       if (IntNo == Intrinsic::riscv_vfirst ||
18624           IntNo == Intrinsic::riscv_vfirst_mask)
18625         return DAG.getAllOnesConstant(DL, VT);
18626       return DAG.getConstant(0, DL, VT);
18627     }
18628     }
18629   }
18630   case ISD::EXPERIMENTAL_VP_REVERSE:
18631     return performVP_REVERSECombine(N, DAG, Subtarget);
18632   case ISD::VP_STORE:
18633     return performVP_STORECombine(N, DAG, Subtarget);
18634   case ISD::BITCAST: {
18635     assert(Subtarget.useRVVForFixedLengthVectors());
18636     SDValue N0 = N->getOperand(0);
18637     EVT VT = N->getValueType(0);
18638     EVT SrcVT = N0.getValueType();
18639     if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18640       unsigned NF = VT.getRISCVVectorTupleNumFields();
18641       unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18642       SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18643       MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18644 
18645       SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18646 
18647       SDValue Result = DAG.getUNDEF(VT);
18648       for (unsigned i = 0; i < NF; ++i)
18649         Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18650                              DAG.getVectorIdxConstant(i, DL));
18651       return Result;
18652     }
18653     // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18654     // type, widen both sides to avoid a trip through memory.
18655     if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18656         VT.isScalarInteger()) {
18657       unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18658       SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18659       Ops[0] = N0;
18660       SDLoc DL(N);
18661       N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18662       N0 = DAG.getBitcast(MVT::i8, N0);
18663       return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18664     }
18665 
18666     return SDValue();
18667   }
18668   case ISD::CTPOP:
18669     if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18670       return V;
18671     break;
18672   }
18673 
18674   return SDValue();
18675 }
18676 
18677 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
18678     EVT XVT, unsigned KeptBits) const {
18679   // For vectors, we don't have a preference..
18680   if (XVT.isVector())
18681     return false;
18682 
18683   if (XVT != MVT::i32 && XVT != MVT::i64)
18684     return false;
18685 
18686   // We can use sext.w for RV64 or an srai 31 on RV32.
18687   if (KeptBits == 32 || KeptBits == 64)
18688     return true;
18689 
18690   // With Zbb we can use sext.h/sext.b.
18691   return Subtarget.hasStdExtZbb() &&
18692          ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18693           KeptBits == 16);
18694 }
18695 
18696 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
18697     const SDNode *N, CombineLevel Level) const {
18698   assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18699           N->getOpcode() == ISD::SRL) &&
18700          "Expected shift op");
18701 
18702   // The following folds are only desirable if `(OP _, c1 << c2)` can be
18703   // materialised in fewer instructions than `(OP _, c1)`:
18704   //
18705   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18706   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18707   SDValue N0 = N->getOperand(0);
18708   EVT Ty = N0.getValueType();
18709 
18710   // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18711   // LD/ST, it can still complete the folding optimization operation performed
18712   // above.
18713   auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18714     for (SDNode *Use : X->users()) {
18715       // This use is the one we're on right now. Skip it
18716       if (Use == User || Use->getOpcode() == ISD::SELECT)
18717         continue;
18718       if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18719         return false;
18720     }
18721     return true;
18722   };
18723 
18724   if (Ty.isScalarInteger() &&
18725       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18726     if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18727       return isUsedByLdSt(N0.getNode(), N);
18728 
18729     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18730     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18731 
18732     // Bail if we might break a sh{1,2,3}add pattern.
18733     if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18734         C2->getZExtValue() <= 3 && N->hasOneUse() &&
18735         N->user_begin()->getOpcode() == ISD::ADD &&
18736         !isUsedByLdSt(*N->user_begin(), nullptr) &&
18737         !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18738       return false;
18739 
18740     if (C1 && C2) {
18741       const APInt &C1Int = C1->getAPIntValue();
18742       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18743 
18744       // We can materialise `c1 << c2` into an add immediate, so it's "free",
18745       // and the combine should happen, to potentially allow further combines
18746       // later.
18747       if (ShiftedC1Int.getSignificantBits() <= 64 &&
18748           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18749         return true;
18750 
18751       // We can materialise `c1` in an add immediate, so it's "free", and the
18752       // combine should be prevented.
18753       if (C1Int.getSignificantBits() <= 64 &&
18754           isLegalAddImmediate(C1Int.getSExtValue()))
18755         return false;
18756 
18757       // Neither constant will fit into an immediate, so find materialisation
18758       // costs.
18759       int C1Cost =
18760           RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18761                                      /*CompressionCost*/ true);
18762       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18763           ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18764           /*CompressionCost*/ true);
18765 
18766       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18767       // combine should be prevented.
18768       if (C1Cost < ShiftedC1Cost)
18769         return false;
18770     }
18771   }
18772 
18773   if (!N0->hasOneUse())
18774     return false;
18775 
18776   if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18777       N0->getOperand(0)->getOpcode() == ISD::ADD &&
18778       !N0->getOperand(0)->hasOneUse())
18779     return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18780 
18781   return true;
18782 }
18783 
18784 bool RISCVTargetLowering::targetShrinkDemandedConstant(
18785     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18786     TargetLoweringOpt &TLO) const {
18787   // Delay this optimization as late as possible.
18788   if (!TLO.LegalOps)
18789     return false;
18790 
18791   EVT VT = Op.getValueType();
18792   if (VT.isVector())
18793     return false;
18794 
18795   unsigned Opcode = Op.getOpcode();
18796   if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18797     return false;
18798 
18799   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18800   if (!C)
18801     return false;
18802 
18803   const APInt &Mask = C->getAPIntValue();
18804 
18805   // Clear all non-demanded bits initially.
18806   APInt ShrunkMask = Mask & DemandedBits;
18807 
18808   // Try to make a smaller immediate by setting undemanded bits.
18809 
18810   APInt ExpandedMask = Mask | ~DemandedBits;
18811 
18812   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18813     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18814   };
18815   auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18816     if (NewMask == Mask)
18817       return true;
18818     SDLoc DL(Op);
18819     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18820     SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18821                                     Op.getOperand(0), NewC);
18822     return TLO.CombineTo(Op, NewOp);
18823   };
18824 
18825   // If the shrunk mask fits in sign extended 12 bits, let the target
18826   // independent code apply it.
18827   if (ShrunkMask.isSignedIntN(12))
18828     return false;
18829 
18830   // And has a few special cases for zext.
18831   if (Opcode == ISD::AND) {
18832     // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18833     // otherwise use SLLI + SRLI.
18834     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18835     if (IsLegalMask(NewMask))
18836       return UseMask(NewMask);
18837 
18838     // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18839     if (VT == MVT::i64) {
18840       APInt NewMask = APInt(64, 0xffffffff);
18841       if (IsLegalMask(NewMask))
18842         return UseMask(NewMask);
18843     }
18844   }
18845 
18846   // For the remaining optimizations, we need to be able to make a negative
18847   // number through a combination of mask and undemanded bits.
18848   if (!ExpandedMask.isNegative())
18849     return false;
18850 
18851   // What is the fewest number of bits we need to represent the negative number.
18852   unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18853 
18854   // Try to make a 12 bit negative immediate. If that fails try to make a 32
18855   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18856   // If we can't create a simm12, we shouldn't change opaque constants.
18857   APInt NewMask = ShrunkMask;
18858   if (MinSignedBits <= 12)
18859     NewMask.setBitsFrom(11);
18860   else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18861     NewMask.setBitsFrom(31);
18862   else
18863     return false;
18864 
18865   // Check that our new mask is a subset of the demanded mask.
18866   assert(IsLegalMask(NewMask));
18867   return UseMask(NewMask);
18868 }
18869 
18870 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18871   static const uint64_t GREVMasks[] = {
18872       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18873       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18874 
18875   for (unsigned Stage = 0; Stage != 6; ++Stage) {
18876     unsigned Shift = 1 << Stage;
18877     if (ShAmt & Shift) {
18878       uint64_t Mask = GREVMasks[Stage];
18879       uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18880       if (IsGORC)
18881         Res |= x;
18882       x = Res;
18883     }
18884   }
18885 
18886   return x;
18887 }
18888 
18889 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
18890                                                         KnownBits &Known,
18891                                                         const APInt &DemandedElts,
18892                                                         const SelectionDAG &DAG,
18893                                                         unsigned Depth) const {
18894   unsigned BitWidth = Known.getBitWidth();
18895   unsigned Opc = Op.getOpcode();
18896   assert((Opc >= ISD::BUILTIN_OP_END ||
18897           Opc == ISD::INTRINSIC_WO_CHAIN ||
18898           Opc == ISD::INTRINSIC_W_CHAIN ||
18899           Opc == ISD::INTRINSIC_VOID) &&
18900          "Should use MaskedValueIsZero if you don't know whether Op"
18901          " is a target node!");
18902 
18903   Known.resetAll();
18904   switch (Opc) {
18905   default: break;
18906   case RISCVISD::SELECT_CC: {
18907     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18908     // If we don't know any bits, early out.
18909     if (Known.isUnknown())
18910       break;
18911     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18912 
18913     // Only known if known in both the LHS and RHS.
18914     Known = Known.intersectWith(Known2);
18915     break;
18916   }
18917   case RISCVISD::CZERO_EQZ:
18918   case RISCVISD::CZERO_NEZ:
18919     Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18920     // Result is either all zero or operand 0. We can propagate zeros, but not
18921     // ones.
18922     Known.One.clearAllBits();
18923     break;
18924   case RISCVISD::REMUW: {
18925     KnownBits Known2;
18926     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18927     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18928     // We only care about the lower 32 bits.
18929     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18930     // Restore the original width by sign extending.
18931     Known = Known.sext(BitWidth);
18932     break;
18933   }
18934   case RISCVISD::DIVUW: {
18935     KnownBits Known2;
18936     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18937     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18938     // We only care about the lower 32 bits.
18939     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18940     // Restore the original width by sign extending.
18941     Known = Known.sext(BitWidth);
18942     break;
18943   }
18944   case RISCVISD::SLLW: {
18945     KnownBits Known2;
18946     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18947     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18948     Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18949     // Restore the original width by sign extending.
18950     Known = Known.sext(BitWidth);
18951     break;
18952   }
18953   case RISCVISD::CTZW: {
18954     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18955     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18956     unsigned LowBits = llvm::bit_width(PossibleTZ);
18957     Known.Zero.setBitsFrom(LowBits);
18958     break;
18959   }
18960   case RISCVISD::CLZW: {
18961     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18962     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18963     unsigned LowBits = llvm::bit_width(PossibleLZ);
18964     Known.Zero.setBitsFrom(LowBits);
18965     break;
18966   }
18967   case RISCVISD::BREV8:
18968   case RISCVISD::ORC_B: {
18969     // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18970     // control value of 7 is equivalent to brev8 and orc.b.
18971     Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18972     bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18973     // To compute zeros, we need to invert the value and invert it back after.
18974     Known.Zero =
18975         ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18976     Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18977     break;
18978   }
18979   case RISCVISD::READ_VLENB: {
18980     // We can use the minimum and maximum VLEN values to bound VLENB.  We
18981     // know VLEN must be a power of two.
18982     const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18983     const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18984     assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18985     Known.Zero.setLowBits(Log2_32(MinVLenB));
18986     Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18987     if (MaxVLenB == MinVLenB)
18988       Known.One.setBit(Log2_32(MinVLenB));
18989     break;
18990   }
18991   case RISCVISD::FCLASS: {
18992     // fclass will only set one of the low 10 bits.
18993     Known.Zero.setBitsFrom(10);
18994     break;
18995   }
18996   case ISD::INTRINSIC_W_CHAIN:
18997   case ISD::INTRINSIC_WO_CHAIN: {
18998     unsigned IntNo =
18999         Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
19000     switch (IntNo) {
19001     default:
19002       // We can't do anything for most intrinsics.
19003       break;
19004     case Intrinsic::riscv_vsetvli:
19005     case Intrinsic::riscv_vsetvlimax: {
19006       bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
19007       unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
19008       RISCVII::VLMUL VLMUL =
19009           static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
19010       unsigned SEW = RISCVVType::decodeVSEW(VSEW);
19011       auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
19012       uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
19013       MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
19014 
19015       // Result of vsetvli must be not larger than AVL.
19016       if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
19017         MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
19018 
19019       unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
19020       if (BitWidth > KnownZeroFirstBit)
19021         Known.Zero.setBitsFrom(KnownZeroFirstBit);
19022       break;
19023     }
19024     }
19025     break;
19026   }
19027   }
19028 }
19029 
19030 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
19031     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19032     unsigned Depth) const {
19033   switch (Op.getOpcode()) {
19034   default:
19035     break;
19036   case RISCVISD::SELECT_CC: {
19037     unsigned Tmp =
19038         DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
19039     if (Tmp == 1) return 1;  // Early out.
19040     unsigned Tmp2 =
19041         DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
19042     return std::min(Tmp, Tmp2);
19043   }
19044   case RISCVISD::CZERO_EQZ:
19045   case RISCVISD::CZERO_NEZ:
19046     // Output is either all zero or operand 0. We can propagate sign bit count
19047     // from operand 0.
19048     return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
19049   case RISCVISD::ABSW: {
19050     // We expand this at isel to negw+max. The result will have 33 sign bits
19051     // if the input has at least 33 sign bits.
19052     unsigned Tmp =
19053         DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
19054     if (Tmp < 33) return 1;
19055     return 33;
19056   }
19057   case RISCVISD::SLLW:
19058   case RISCVISD::SRAW:
19059   case RISCVISD::SRLW:
19060   case RISCVISD::DIVW:
19061   case RISCVISD::DIVUW:
19062   case RISCVISD::REMUW:
19063   case RISCVISD::ROLW:
19064   case RISCVISD::RORW:
19065   case RISCVISD::FCVT_W_RV64:
19066   case RISCVISD::FCVT_WU_RV64:
19067   case RISCVISD::STRICT_FCVT_W_RV64:
19068   case RISCVISD::STRICT_FCVT_WU_RV64:
19069     // TODO: As the result is sign-extended, this is conservatively correct. A
19070     // more precise answer could be calculated for SRAW depending on known
19071     // bits in the shift amount.
19072     return 33;
19073   case RISCVISD::VMV_X_S: {
19074     // The number of sign bits of the scalar result is computed by obtaining the
19075     // element type of the input vector operand, subtracting its width from the
19076     // XLEN, and then adding one (sign bit within the element type). If the
19077     // element type is wider than XLen, the least-significant XLEN bits are
19078     // taken.
19079     unsigned XLen = Subtarget.getXLen();
19080     unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
19081     if (EltBits <= XLen)
19082       return XLen - EltBits + 1;
19083     break;
19084   }
19085   case ISD::INTRINSIC_W_CHAIN: {
19086     unsigned IntNo = Op.getConstantOperandVal(1);
19087     switch (IntNo) {
19088     default:
19089       break;
19090     case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
19091     case Intrinsic::riscv_masked_atomicrmw_add_i64:
19092     case Intrinsic::riscv_masked_atomicrmw_sub_i64:
19093     case Intrinsic::riscv_masked_atomicrmw_nand_i64:
19094     case Intrinsic::riscv_masked_atomicrmw_max_i64:
19095     case Intrinsic::riscv_masked_atomicrmw_min_i64:
19096     case Intrinsic::riscv_masked_atomicrmw_umax_i64:
19097     case Intrinsic::riscv_masked_atomicrmw_umin_i64:
19098     case Intrinsic::riscv_masked_cmpxchg_i64:
19099       // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
19100       // narrow atomic operation. These are implemented using atomic
19101       // operations at the minimum supported atomicrmw/cmpxchg width whose
19102       // result is then sign extended to XLEN. With +A, the minimum width is
19103       // 32 for both 64 and 32.
19104       assert(Subtarget.getXLen() == 64);
19105       assert(getMinCmpXchgSizeInBits() == 32);
19106       assert(Subtarget.hasStdExtA());
19107       return 33;
19108     }
19109     break;
19110   }
19111   }
19112 
19113   return 1;
19114 }
19115 
19116 bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
19117     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19118     bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
19119 
19120   // TODO: Add more target nodes.
19121   switch (Op.getOpcode()) {
19122   case RISCVISD::SELECT_CC:
19123     // Integer select_cc cannot create poison.
19124     // TODO: What are the FP poison semantics?
19125     // TODO: This instruction blocks poison from the unselected operand, can
19126     // we do anything with that?
19127     return !Op.getValueType().isInteger();
19128   }
19129   return TargetLowering::canCreateUndefOrPoisonForTargetNode(
19130       Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
19131 }
19132 
19133 const Constant *
19134 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
19135   assert(Ld && "Unexpected null LoadSDNode");
19136   if (!ISD::isNormalLoad(Ld))
19137     return nullptr;
19138 
19139   SDValue Ptr = Ld->getBasePtr();
19140 
19141   // Only constant pools with no offset are supported.
19142   auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
19143     auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
19144     if (!CNode || CNode->isMachineConstantPoolEntry() ||
19145         CNode->getOffset() != 0)
19146       return nullptr;
19147 
19148     return CNode;
19149   };
19150 
19151   // Simple case, LLA.
19152   if (Ptr.getOpcode() == RISCVISD::LLA) {
19153     auto *CNode = GetSupportedConstantPool(Ptr);
19154     if (!CNode || CNode->getTargetFlags() != 0)
19155       return nullptr;
19156 
19157     return CNode->getConstVal();
19158   }
19159 
19160   // Look for a HI and ADD_LO pair.
19161   if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
19162       Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
19163     return nullptr;
19164 
19165   auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
19166   auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
19167 
19168   if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
19169       !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
19170     return nullptr;
19171 
19172   if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
19173     return nullptr;
19174 
19175   return CNodeLo->getConstVal();
19176 }
19177 
19178 static MachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI,
19179                                                     MachineBasicBlock *BB) {
19180   assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
19181 
19182   // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
19183   // Should the count have wrapped while it was being read, we need to try
19184   // again.
19185   // For example:
19186   // ```
19187   // read:
19188   //   csrrs x3, counterh # load high word of counter
19189   //   csrrs x2, counter # load low word of counter
19190   //   csrrs x4, counterh # load high word of counter
19191   //   bne x3, x4, read # check if high word reads match, otherwise try again
19192   // ```
19193 
19194   MachineFunction &MF = *BB->getParent();
19195   const BasicBlock *LLVMBB = BB->getBasicBlock();
19196   MachineFunction::iterator It = ++BB->getIterator();
19197 
19198   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
19199   MF.insert(It, LoopMBB);
19200 
19201   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
19202   MF.insert(It, DoneMBB);
19203 
19204   // Transfer the remainder of BB and its successor edges to DoneMBB.
19205   DoneMBB->splice(DoneMBB->begin(), BB,
19206                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
19207   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
19208 
19209   BB->addSuccessor(LoopMBB);
19210 
19211   MachineRegisterInfo &RegInfo = MF.getRegInfo();
19212   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19213   Register LoReg = MI.getOperand(0).getReg();
19214   Register HiReg = MI.getOperand(1).getReg();
19215   int64_t LoCounter = MI.getOperand(2).getImm();
19216   int64_t HiCounter = MI.getOperand(3).getImm();
19217   DebugLoc DL = MI.getDebugLoc();
19218 
19219   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
19220   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
19221       .addImm(HiCounter)
19222       .addReg(RISCV::X0);
19223   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
19224       .addImm(LoCounter)
19225       .addReg(RISCV::X0);
19226   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
19227       .addImm(HiCounter)
19228       .addReg(RISCV::X0);
19229 
19230   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
19231       .addReg(HiReg)
19232       .addReg(ReadAgainReg)
19233       .addMBB(LoopMBB);
19234 
19235   LoopMBB->addSuccessor(LoopMBB);
19236   LoopMBB->addSuccessor(DoneMBB);
19237 
19238   MI.eraseFromParent();
19239 
19240   return DoneMBB;
19241 }
19242 
19243 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
19244                                              MachineBasicBlock *BB,
19245                                              const RISCVSubtarget &Subtarget) {
19246   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
19247 
19248   MachineFunction &MF = *BB->getParent();
19249   DebugLoc DL = MI.getDebugLoc();
19250   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
19251   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
19252   Register LoReg = MI.getOperand(0).getReg();
19253   Register HiReg = MI.getOperand(1).getReg();
19254   Register SrcReg = MI.getOperand(2).getReg();
19255 
19256   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
19257   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19258 
19259   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
19260                           RI, Register());
19261   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
19262   MachineMemOperand *MMOLo =
19263       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
19264   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
19265       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
19266   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
19267       .addFrameIndex(FI)
19268       .addImm(0)
19269       .addMemOperand(MMOLo);
19270   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
19271       .addFrameIndex(FI)
19272       .addImm(4)
19273       .addMemOperand(MMOHi);
19274   MI.eraseFromParent(); // The pseudo instruction is gone now.
19275   return BB;
19276 }
19277 
19278 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
19279                                                  MachineBasicBlock *BB,
19280                                                  const RISCVSubtarget &Subtarget) {
19281   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19282          "Unexpected instruction");
19283 
19284   MachineFunction &MF = *BB->getParent();
19285   DebugLoc DL = MI.getDebugLoc();
19286   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
19287   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
19288   Register DstReg = MI.getOperand(0).getReg();
19289   Register LoReg = MI.getOperand(1).getReg();
19290   Register HiReg = MI.getOperand(2).getReg();
19291 
19292   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
19293   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19294 
19295   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
19296   MachineMemOperand *MMOLo =
19297       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
19298   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
19299       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
19300   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19301       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
19302       .addFrameIndex(FI)
19303       .addImm(0)
19304       .addMemOperand(MMOLo);
19305   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19306       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
19307       .addFrameIndex(FI)
19308       .addImm(4)
19309       .addMemOperand(MMOHi);
19310   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
19311   MI.eraseFromParent(); // The pseudo instruction is gone now.
19312   return BB;
19313 }
19314 
19315 static bool isSelectPseudo(MachineInstr &MI) {
19316   switch (MI.getOpcode()) {
19317   default:
19318     return false;
19319   case RISCV::Select_GPR_Using_CC_GPR:
19320   case RISCV::Select_GPR_Using_CC_Imm:
19321   case RISCV::Select_FPR16_Using_CC_GPR:
19322   case RISCV::Select_FPR16INX_Using_CC_GPR:
19323   case RISCV::Select_FPR32_Using_CC_GPR:
19324   case RISCV::Select_FPR32INX_Using_CC_GPR:
19325   case RISCV::Select_FPR64_Using_CC_GPR:
19326   case RISCV::Select_FPR64INX_Using_CC_GPR:
19327   case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19328     return true;
19329   }
19330 }
19331 
19332 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
19333                                         unsigned RelOpcode, unsigned EqOpcode,
19334                                         const RISCVSubtarget &Subtarget) {
19335   DebugLoc DL = MI.getDebugLoc();
19336   Register DstReg = MI.getOperand(0).getReg();
19337   Register Src1Reg = MI.getOperand(1).getReg();
19338   Register Src2Reg = MI.getOperand(2).getReg();
19339   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
19340   Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19341   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
19342 
19343   // Save the current FFLAGS.
19344   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19345 
19346   auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19347                  .addReg(Src1Reg)
19348                  .addReg(Src2Reg);
19349   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19350     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
19351 
19352   // Restore the FFLAGS.
19353   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19354       .addReg(SavedFFlags, RegState::Kill);
19355 
19356   // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19357   auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19358                   .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19359                   .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19360   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19361     MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
19362 
19363   // Erase the pseudoinstruction.
19364   MI.eraseFromParent();
19365   return BB;
19366 }
19367 
19368 static MachineBasicBlock *
19369 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
19370                           MachineBasicBlock *ThisMBB,
19371                           const RISCVSubtarget &Subtarget) {
19372   // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19373   // Without this, custom-inserter would have generated:
19374   //
19375   //   A
19376   //   | \
19377   //   |  B
19378   //   | /
19379   //   C
19380   //   | \
19381   //   |  D
19382   //   | /
19383   //   E
19384   //
19385   // A: X = ...; Y = ...
19386   // B: empty
19387   // C: Z = PHI [X, A], [Y, B]
19388   // D: empty
19389   // E: PHI [X, C], [Z, D]
19390   //
19391   // If we lower both Select_FPRX_ in a single step, we can instead generate:
19392   //
19393   //   A
19394   //   | \
19395   //   |  C
19396   //   | /|
19397   //   |/ |
19398   //   |  |
19399   //   |  D
19400   //   | /
19401   //   E
19402   //
19403   // A: X = ...; Y = ...
19404   // D: empty
19405   // E: PHI [X, A], [X, C], [Y, D]
19406 
19407   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19408   const DebugLoc &DL = First.getDebugLoc();
19409   const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19410   MachineFunction *F = ThisMBB->getParent();
19411   MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19412   MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19413   MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19414   MachineFunction::iterator It = ++ThisMBB->getIterator();
19415   F->insert(It, FirstMBB);
19416   F->insert(It, SecondMBB);
19417   F->insert(It, SinkMBB);
19418 
19419   // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19420   SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19421                   std::next(MachineBasicBlock::iterator(First)),
19422                   ThisMBB->end());
19423   SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19424 
19425   // Fallthrough block for ThisMBB.
19426   ThisMBB->addSuccessor(FirstMBB);
19427   // Fallthrough block for FirstMBB.
19428   FirstMBB->addSuccessor(SecondMBB);
19429   ThisMBB->addSuccessor(SinkMBB);
19430   FirstMBB->addSuccessor(SinkMBB);
19431   // This is fallthrough.
19432   SecondMBB->addSuccessor(SinkMBB);
19433 
19434   auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19435   Register FLHS = First.getOperand(1).getReg();
19436   Register FRHS = First.getOperand(2).getReg();
19437   // Insert appropriate branch.
19438   BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19439       .addReg(FLHS)
19440       .addReg(FRHS)
19441       .addMBB(SinkMBB);
19442 
19443   Register SLHS = Second.getOperand(1).getReg();
19444   Register SRHS = Second.getOperand(2).getReg();
19445   Register Op1Reg4 = First.getOperand(4).getReg();
19446   Register Op1Reg5 = First.getOperand(5).getReg();
19447 
19448   auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19449   // Insert appropriate branch.
19450   BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19451       .addReg(SLHS)
19452       .addReg(SRHS)
19453       .addMBB(SinkMBB);
19454 
19455   Register DestReg = Second.getOperand(0).getReg();
19456   Register Op2Reg4 = Second.getOperand(4).getReg();
19457   BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19458       .addReg(Op2Reg4)
19459       .addMBB(ThisMBB)
19460       .addReg(Op1Reg4)
19461       .addMBB(FirstMBB)
19462       .addReg(Op1Reg5)
19463       .addMBB(SecondMBB);
19464 
19465   // Now remove the Select_FPRX_s.
19466   First.eraseFromParent();
19467   Second.eraseFromParent();
19468   return SinkMBB;
19469 }
19470 
19471 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
19472                                            MachineBasicBlock *BB,
19473                                            const RISCVSubtarget &Subtarget) {
19474   // To "insert" Select_* instructions, we actually have to insert the triangle
19475   // control-flow pattern.  The incoming instructions know the destination vreg
19476   // to set, the condition code register to branch on, the true/false values to
19477   // select between, and the condcode to use to select the appropriate branch.
19478   //
19479   // We produce the following control flow:
19480   //     HeadMBB
19481   //     |  \
19482   //     |  IfFalseMBB
19483   //     | /
19484   //    TailMBB
19485   //
19486   // When we find a sequence of selects we attempt to optimize their emission
19487   // by sharing the control flow. Currently we only handle cases where we have
19488   // multiple selects with the exact same condition (same LHS, RHS and CC).
19489   // The selects may be interleaved with other instructions if the other
19490   // instructions meet some requirements we deem safe:
19491   // - They are not pseudo instructions.
19492   // - They are debug instructions. Otherwise,
19493   // - They do not have side-effects, do not access memory and their inputs do
19494   //   not depend on the results of the select pseudo-instructions.
19495   // The TrueV/FalseV operands of the selects cannot depend on the result of
19496   // previous selects in the sequence.
19497   // These conditions could be further relaxed. See the X86 target for a
19498   // related approach and more information.
19499   //
19500   // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19501   // is checked here and handled by a separate function -
19502   // EmitLoweredCascadedSelect.
19503 
19504   auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19505   if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19506        MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19507       Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19508       Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19509       Next->getOperand(5).isKill())
19510     return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19511 
19512   Register LHS = MI.getOperand(1).getReg();
19513   Register RHS;
19514   if (MI.getOperand(2).isReg())
19515     RHS = MI.getOperand(2).getReg();
19516   auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19517 
19518   SmallVector<MachineInstr *, 4> SelectDebugValues;
19519   SmallSet<Register, 4> SelectDests;
19520   SelectDests.insert(MI.getOperand(0).getReg());
19521 
19522   MachineInstr *LastSelectPseudo = &MI;
19523   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19524        SequenceMBBI != E; ++SequenceMBBI) {
19525     if (SequenceMBBI->isDebugInstr())
19526       continue;
19527     if (isSelectPseudo(*SequenceMBBI)) {
19528       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19529           !SequenceMBBI->getOperand(2).isReg() ||
19530           SequenceMBBI->getOperand(2).getReg() != RHS ||
19531           SequenceMBBI->getOperand(3).getImm() != CC ||
19532           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19533           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19534         break;
19535       LastSelectPseudo = &*SequenceMBBI;
19536       SequenceMBBI->collectDebugValues(SelectDebugValues);
19537       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19538       continue;
19539     }
19540     if (SequenceMBBI->hasUnmodeledSideEffects() ||
19541         SequenceMBBI->mayLoadOrStore() ||
19542         SequenceMBBI->usesCustomInsertionHook())
19543       break;
19544     if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19545           return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19546         }))
19547       break;
19548   }
19549 
19550   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19551   const BasicBlock *LLVM_BB = BB->getBasicBlock();
19552   DebugLoc DL = MI.getDebugLoc();
19553   MachineFunction::iterator I = ++BB->getIterator();
19554 
19555   MachineBasicBlock *HeadMBB = BB;
19556   MachineFunction *F = BB->getParent();
19557   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19558   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19559 
19560   F->insert(I, IfFalseMBB);
19561   F->insert(I, TailMBB);
19562 
19563   // Set the call frame size on entry to the new basic blocks.
19564   unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19565   IfFalseMBB->setCallFrameSize(CallFrameSize);
19566   TailMBB->setCallFrameSize(CallFrameSize);
19567 
19568   // Transfer debug instructions associated with the selects to TailMBB.
19569   for (MachineInstr *DebugInstr : SelectDebugValues) {
19570     TailMBB->push_back(DebugInstr->removeFromParent());
19571   }
19572 
19573   // Move all instructions after the sequence to TailMBB.
19574   TailMBB->splice(TailMBB->end(), HeadMBB,
19575                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19576   // Update machine-CFG edges by transferring all successors of the current
19577   // block to the new block which will contain the Phi nodes for the selects.
19578   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19579   // Set the successors for HeadMBB.
19580   HeadMBB->addSuccessor(IfFalseMBB);
19581   HeadMBB->addSuccessor(TailMBB);
19582 
19583   // Insert appropriate branch.
19584   if (MI.getOperand(2).isImm())
19585     BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19586         .addReg(LHS)
19587         .addImm(MI.getOperand(2).getImm())
19588         .addMBB(TailMBB);
19589   else
19590     BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19591         .addReg(LHS)
19592         .addReg(RHS)
19593         .addMBB(TailMBB);
19594 
19595   // IfFalseMBB just falls through to TailMBB.
19596   IfFalseMBB->addSuccessor(TailMBB);
19597 
19598   // Create PHIs for all of the select pseudo-instructions.
19599   auto SelectMBBI = MI.getIterator();
19600   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19601   auto InsertionPoint = TailMBB->begin();
19602   while (SelectMBBI != SelectEnd) {
19603     auto Next = std::next(SelectMBBI);
19604     if (isSelectPseudo(*SelectMBBI)) {
19605       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19606       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19607               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19608           .addReg(SelectMBBI->getOperand(4).getReg())
19609           .addMBB(HeadMBB)
19610           .addReg(SelectMBBI->getOperand(5).getReg())
19611           .addMBB(IfFalseMBB);
19612       SelectMBBI->eraseFromParent();
19613     }
19614     SelectMBBI = Next;
19615   }
19616 
19617   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19618   return TailMBB;
19619 }
19620 
19621 // Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19622 static const RISCV::RISCVMaskedPseudoInfo *
19623 lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19624   const RISCVVInversePseudosTable::PseudoInfo *Inverse =
19625       RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19626   assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19627   const RISCV::RISCVMaskedPseudoInfo *Masked =
19628       RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19629   assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19630   return Masked;
19631 }
19632 
19633 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
19634                                                     MachineBasicBlock *BB,
19635                                                     unsigned CVTXOpc) {
19636   DebugLoc DL = MI.getDebugLoc();
19637 
19638   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
19639 
19640   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
19641   Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19642 
19643   // Save the old value of FFLAGS.
19644   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19645 
19646   assert(MI.getNumOperands() == 7);
19647 
19648   // Emit a VFCVT_X_F
19649   const TargetRegisterInfo *TRI =
19650       BB->getParent()->getSubtarget().getRegisterInfo();
19651   const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19652   Register Tmp = MRI.createVirtualRegister(RC);
19653   BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19654       .add(MI.getOperand(1))
19655       .add(MI.getOperand(2))
19656       .add(MI.getOperand(3))
19657       .add(MachineOperand::CreateImm(7)) // frm = DYN
19658       .add(MI.getOperand(4))
19659       .add(MI.getOperand(5))
19660       .add(MI.getOperand(6))
19661       .add(MachineOperand::CreateReg(RISCV::FRM,
19662                                      /*IsDef*/ false,
19663                                      /*IsImp*/ true));
19664 
19665   // Emit a VFCVT_F_X
19666   RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19667   unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19668   // There is no E8 variant for VFCVT_F_X.
19669   assert(Log2SEW >= 4);
19670   unsigned CVTFOpc =
19671       lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19672           ->MaskedPseudo;
19673 
19674   BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19675       .add(MI.getOperand(0))
19676       .add(MI.getOperand(1))
19677       .addReg(Tmp)
19678       .add(MI.getOperand(3))
19679       .add(MachineOperand::CreateImm(7)) // frm = DYN
19680       .add(MI.getOperand(4))
19681       .add(MI.getOperand(5))
19682       .add(MI.getOperand(6))
19683       .add(MachineOperand::CreateReg(RISCV::FRM,
19684                                      /*IsDef*/ false,
19685                                      /*IsImp*/ true));
19686 
19687   // Restore FFLAGS.
19688   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19689       .addReg(SavedFFLAGS, RegState::Kill);
19690 
19691   // Erase the pseudoinstruction.
19692   MI.eraseFromParent();
19693   return BB;
19694 }
19695 
19696 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
19697                                      const RISCVSubtarget &Subtarget) {
19698   unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19699   const TargetRegisterClass *RC;
19700   switch (MI.getOpcode()) {
19701   default:
19702     llvm_unreachable("Unexpected opcode");
19703   case RISCV::PseudoFROUND_H:
19704     CmpOpc = RISCV::FLT_H;
19705     F2IOpc = RISCV::FCVT_W_H;
19706     I2FOpc = RISCV::FCVT_H_W;
19707     FSGNJOpc = RISCV::FSGNJ_H;
19708     FSGNJXOpc = RISCV::FSGNJX_H;
19709     RC = &RISCV::FPR16RegClass;
19710     break;
19711   case RISCV::PseudoFROUND_H_INX:
19712     CmpOpc = RISCV::FLT_H_INX;
19713     F2IOpc = RISCV::FCVT_W_H_INX;
19714     I2FOpc = RISCV::FCVT_H_W_INX;
19715     FSGNJOpc = RISCV::FSGNJ_H_INX;
19716     FSGNJXOpc = RISCV::FSGNJX_H_INX;
19717     RC = &RISCV::GPRF16RegClass;
19718     break;
19719   case RISCV::PseudoFROUND_S:
19720     CmpOpc = RISCV::FLT_S;
19721     F2IOpc = RISCV::FCVT_W_S;
19722     I2FOpc = RISCV::FCVT_S_W;
19723     FSGNJOpc = RISCV::FSGNJ_S;
19724     FSGNJXOpc = RISCV::FSGNJX_S;
19725     RC = &RISCV::FPR32RegClass;
19726     break;
19727   case RISCV::PseudoFROUND_S_INX:
19728     CmpOpc = RISCV::FLT_S_INX;
19729     F2IOpc = RISCV::FCVT_W_S_INX;
19730     I2FOpc = RISCV::FCVT_S_W_INX;
19731     FSGNJOpc = RISCV::FSGNJ_S_INX;
19732     FSGNJXOpc = RISCV::FSGNJX_S_INX;
19733     RC = &RISCV::GPRF32RegClass;
19734     break;
19735   case RISCV::PseudoFROUND_D:
19736     assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19737     CmpOpc = RISCV::FLT_D;
19738     F2IOpc = RISCV::FCVT_L_D;
19739     I2FOpc = RISCV::FCVT_D_L;
19740     FSGNJOpc = RISCV::FSGNJ_D;
19741     FSGNJXOpc = RISCV::FSGNJX_D;
19742     RC = &RISCV::FPR64RegClass;
19743     break;
19744   case RISCV::PseudoFROUND_D_INX:
19745     assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19746     CmpOpc = RISCV::FLT_D_INX;
19747     F2IOpc = RISCV::FCVT_L_D_INX;
19748     I2FOpc = RISCV::FCVT_D_L_INX;
19749     FSGNJOpc = RISCV::FSGNJ_D_INX;
19750     FSGNJXOpc = RISCV::FSGNJX_D_INX;
19751     RC = &RISCV::GPRRegClass;
19752     break;
19753   }
19754 
19755   const BasicBlock *BB = MBB->getBasicBlock();
19756   DebugLoc DL = MI.getDebugLoc();
19757   MachineFunction::iterator I = ++MBB->getIterator();
19758 
19759   MachineFunction *F = MBB->getParent();
19760   MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19761   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19762 
19763   F->insert(I, CvtMBB);
19764   F->insert(I, DoneMBB);
19765   // Move all instructions after the sequence to DoneMBB.
19766   DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19767                   MBB->end());
19768   // Update machine-CFG edges by transferring all successors of the current
19769   // block to the new block which will contain the Phi nodes for the selects.
19770   DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
19771   // Set the successors for MBB.
19772   MBB->addSuccessor(CvtMBB);
19773   MBB->addSuccessor(DoneMBB);
19774 
19775   Register DstReg = MI.getOperand(0).getReg();
19776   Register SrcReg = MI.getOperand(1).getReg();
19777   Register MaxReg = MI.getOperand(2).getReg();
19778   int64_t FRM = MI.getOperand(3).getImm();
19779 
19780   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19781   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
19782 
19783   Register FabsReg = MRI.createVirtualRegister(RC);
19784   BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19785 
19786   // Compare the FP value to the max value.
19787   Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19788   auto MIB =
19789       BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19790   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19791     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
19792 
19793   // Insert branch.
19794   BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19795       .addReg(CmpReg)
19796       .addReg(RISCV::X0)
19797       .addMBB(DoneMBB);
19798 
19799   CvtMBB->addSuccessor(DoneMBB);
19800 
19801   // Convert to integer.
19802   Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19803   MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19804   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19805     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
19806 
19807   // Convert back to FP.
19808   Register I2FReg = MRI.createVirtualRegister(RC);
19809   MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19810   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
19811     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
19812 
19813   // Restore the sign bit.
19814   Register CvtReg = MRI.createVirtualRegister(RC);
19815   BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19816 
19817   // Merge the results.
19818   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19819       .addReg(SrcReg)
19820       .addMBB(MBB)
19821       .addReg(CvtReg)
19822       .addMBB(CvtMBB);
19823 
19824   MI.eraseFromParent();
19825   return DoneMBB;
19826 }
19827 
19828 MachineBasicBlock *
19829 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
19830                                                  MachineBasicBlock *BB) const {
19831   switch (MI.getOpcode()) {
19832   default:
19833     llvm_unreachable("Unexpected instr type to insert");
19834   case RISCV::ReadCounterWide:
19835     assert(!Subtarget.is64Bit() &&
19836            "ReadCounterWide is only to be used on riscv32");
19837     return emitReadCounterWidePseudo(MI, BB);
19838   case RISCV::Select_GPR_Using_CC_GPR:
19839   case RISCV::Select_GPR_Using_CC_Imm:
19840   case RISCV::Select_FPR16_Using_CC_GPR:
19841   case RISCV::Select_FPR16INX_Using_CC_GPR:
19842   case RISCV::Select_FPR32_Using_CC_GPR:
19843   case RISCV::Select_FPR32INX_Using_CC_GPR:
19844   case RISCV::Select_FPR64_Using_CC_GPR:
19845   case RISCV::Select_FPR64INX_Using_CC_GPR:
19846   case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19847     return emitSelectPseudo(MI, BB, Subtarget);
19848   case RISCV::BuildPairF64Pseudo:
19849     return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19850   case RISCV::SplitF64Pseudo:
19851     return emitSplitF64Pseudo(MI, BB, Subtarget);
19852   case RISCV::PseudoQuietFLE_H:
19853     return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19854   case RISCV::PseudoQuietFLE_H_INX:
19855     return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19856   case RISCV::PseudoQuietFLT_H:
19857     return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19858   case RISCV::PseudoQuietFLT_H_INX:
19859     return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19860   case RISCV::PseudoQuietFLE_S:
19861     return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19862   case RISCV::PseudoQuietFLE_S_INX:
19863     return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19864   case RISCV::PseudoQuietFLT_S:
19865     return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19866   case RISCV::PseudoQuietFLT_S_INX:
19867     return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19868   case RISCV::PseudoQuietFLE_D:
19869     return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19870   case RISCV::PseudoQuietFLE_D_INX:
19871     return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19872   case RISCV::PseudoQuietFLE_D_IN32X:
19873     return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19874                          Subtarget);
19875   case RISCV::PseudoQuietFLT_D:
19876     return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19877   case RISCV::PseudoQuietFLT_D_INX:
19878     return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19879   case RISCV::PseudoQuietFLT_D_IN32X:
19880     return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19881                          Subtarget);
19882 
19883   case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19884     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19885   case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19886     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19887   case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19888     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19889   case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19890     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19891   case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19892     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19893   case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19894     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19895   case RISCV::PseudoFROUND_H:
19896   case RISCV::PseudoFROUND_H_INX:
19897   case RISCV::PseudoFROUND_S:
19898   case RISCV::PseudoFROUND_S_INX:
19899   case RISCV::PseudoFROUND_D:
19900   case RISCV::PseudoFROUND_D_INX:
19901   case RISCV::PseudoFROUND_D_IN32X:
19902     return emitFROUND(MI, BB, Subtarget);
19903   case RISCV::PROBED_STACKALLOC_DYN:
19904     return emitDynamicProbedAlloc(MI, BB);
19905   case TargetOpcode::STATEPOINT:
19906     // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19907     // while jal call instruction (where statepoint will be lowered at the end)
19908     // has implicit def. This def is early-clobber as it will be set at
19909     // the moment of the call and earlier than any use is read.
19910     // Add this implicit dead def here as a workaround.
19911     MI.addOperand(*MI.getMF(),
19912                   MachineOperand::CreateReg(
19913                       RISCV::X1, /*isDef*/ true,
19914                       /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19915                       /*isUndef*/ false, /*isEarlyClobber*/ true));
19916     [[fallthrough]];
19917   case TargetOpcode::STACKMAP:
19918   case TargetOpcode::PATCHPOINT:
19919     if (!Subtarget.is64Bit())
19920       report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19921                          "supported on 64-bit targets");
19922     return emitPatchPoint(MI, BB);
19923   }
19924 }
19925 
19926 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
19927                                                         SDNode *Node) const {
19928   // Add FRM dependency to any instructions with dynamic rounding mode.
19929   int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19930   if (Idx < 0) {
19931     // Vector pseudos have FRM index indicated by TSFlags.
19932     Idx = RISCVII::getFRMOpNum(MI.getDesc());
19933     if (Idx < 0)
19934       return;
19935   }
19936   if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19937     return;
19938   // If the instruction already reads FRM, don't add another read.
19939   if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19940     return;
19941   MI.addOperand(
19942       MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19943 }
19944 
19945 void RISCVTargetLowering::analyzeInputArgs(
19946     MachineFunction &MF, CCState &CCInfo,
19947     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19948     RISCVCCAssignFn Fn) const {
19949   unsigned NumArgs = Ins.size();
19950   FunctionType *FType = MF.getFunction().getFunctionType();
19951 
19952   for (unsigned i = 0; i != NumArgs; ++i) {
19953     MVT ArgVT = Ins[i].VT;
19954     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19955 
19956     Type *ArgTy = nullptr;
19957     if (IsRet)
19958       ArgTy = FType->getReturnType();
19959     else if (Ins[i].isOrigArg())
19960       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19961 
19962     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19963            /*IsFixed=*/true, IsRet, ArgTy)) {
19964       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19965                         << ArgVT << '\n');
19966       llvm_unreachable(nullptr);
19967     }
19968   }
19969 }
19970 
19971 void RISCVTargetLowering::analyzeOutputArgs(
19972     MachineFunction &MF, CCState &CCInfo,
19973     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19974     CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19975   unsigned NumArgs = Outs.size();
19976 
19977   for (unsigned i = 0; i != NumArgs; i++) {
19978     MVT ArgVT = Outs[i].VT;
19979     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19980     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19981 
19982     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19983            Outs[i].IsFixed, IsRet, OrigTy)) {
19984       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19985                         << ArgVT << "\n");
19986       llvm_unreachable(nullptr);
19987     }
19988   }
19989 }
19990 
19991 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19992 // values.
19993 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
19994                                    const CCValAssign &VA, const SDLoc &DL,
19995                                    const RISCVSubtarget &Subtarget) {
19996   if (VA.needsCustom()) {
19997     if (VA.getLocVT().isInteger() &&
19998         (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19999       return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
20000     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
20001       return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
20002     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
20003       return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
20004     llvm_unreachable("Unexpected Custom handling.");
20005   }
20006 
20007   switch (VA.getLocInfo()) {
20008   default:
20009     llvm_unreachable("Unexpected CCValAssign::LocInfo");
20010   case CCValAssign::Full:
20011     break;
20012   case CCValAssign::BCvt:
20013     Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
20014     break;
20015   }
20016   return Val;
20017 }
20018 
20019 // The caller is responsible for loading the full value if the argument is
20020 // passed with CCValAssign::Indirect.
20021 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
20022                                 const CCValAssign &VA, const SDLoc &DL,
20023                                 const ISD::InputArg &In,
20024                                 const RISCVTargetLowering &TLI) {
20025   MachineFunction &MF = DAG.getMachineFunction();
20026   MachineRegisterInfo &RegInfo = MF.getRegInfo();
20027   EVT LocVT = VA.getLocVT();
20028   SDValue Val;
20029   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
20030   Register VReg = RegInfo.createVirtualRegister(RC);
20031   RegInfo.addLiveIn(VA.getLocReg(), VReg);
20032   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
20033 
20034   // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
20035   if (In.isOrigArg()) {
20036     Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
20037     if (OrigArg->getType()->isIntegerTy()) {
20038       unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
20039       // An input zero extended from i31 can also be considered sign extended.
20040       if ((BitWidth <= 32 && In.Flags.isSExt()) ||
20041           (BitWidth < 32 && In.Flags.isZExt())) {
20042         RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
20043         RVFI->addSExt32Register(VReg);
20044       }
20045     }
20046   }
20047 
20048   if (VA.getLocInfo() == CCValAssign::Indirect)
20049     return Val;
20050 
20051   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
20052 }
20053 
20054 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
20055                                    const CCValAssign &VA, const SDLoc &DL,
20056                                    const RISCVSubtarget &Subtarget) {
20057   EVT LocVT = VA.getLocVT();
20058 
20059   if (VA.needsCustom()) {
20060     if (LocVT.isInteger() &&
20061         (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
20062       return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
20063     if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
20064       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
20065     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
20066       return convertToScalableVector(LocVT, Val, DAG, Subtarget);
20067     llvm_unreachable("Unexpected Custom handling.");
20068   }
20069 
20070   switch (VA.getLocInfo()) {
20071   default:
20072     llvm_unreachable("Unexpected CCValAssign::LocInfo");
20073   case CCValAssign::Full:
20074     break;
20075   case CCValAssign::BCvt:
20076     Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
20077     break;
20078   }
20079   return Val;
20080 }
20081 
20082 // The caller is responsible for loading the full value if the argument is
20083 // passed with CCValAssign::Indirect.
20084 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
20085                                 const CCValAssign &VA, const SDLoc &DL) {
20086   MachineFunction &MF = DAG.getMachineFunction();
20087   MachineFrameInfo &MFI = MF.getFrameInfo();
20088   EVT LocVT = VA.getLocVT();
20089   EVT ValVT = VA.getValVT();
20090   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
20091   if (VA.getLocInfo() == CCValAssign::Indirect) {
20092     // When the value is a scalable vector, we save the pointer which points to
20093     // the scalable vector value in the stack. The ValVT will be the pointer
20094     // type, instead of the scalable vector type.
20095     ValVT = LocVT;
20096   }
20097   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
20098                                  /*IsImmutable=*/true);
20099   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20100   SDValue Val;
20101 
20102   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
20103   switch (VA.getLocInfo()) {
20104   default:
20105     llvm_unreachable("Unexpected CCValAssign::LocInfo");
20106   case CCValAssign::Full:
20107   case CCValAssign::Indirect:
20108   case CCValAssign::BCvt:
20109     break;
20110   }
20111   Val = DAG.getExtLoad(
20112       ExtType, DL, LocVT, Chain, FIN,
20113       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
20114   return Val;
20115 }
20116 
20117 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
20118                                        const CCValAssign &VA,
20119                                        const CCValAssign &HiVA,
20120                                        const SDLoc &DL) {
20121   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
20122          "Unexpected VA");
20123   MachineFunction &MF = DAG.getMachineFunction();
20124   MachineFrameInfo &MFI = MF.getFrameInfo();
20125   MachineRegisterInfo &RegInfo = MF.getRegInfo();
20126 
20127   assert(VA.isRegLoc() && "Expected register VA assignment");
20128 
20129   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20130   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
20131   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
20132   SDValue Hi;
20133   if (HiVA.isMemLoc()) {
20134     // Second half of f64 is passed on the stack.
20135     int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
20136                                    /*IsImmutable=*/true);
20137     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
20138     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
20139                      MachinePointerInfo::getFixedStack(MF, FI));
20140   } else {
20141     // Second half of f64 is passed in another GPR.
20142     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20143     RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
20144     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
20145   }
20146   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
20147 }
20148 
20149 // Transform physical registers into virtual registers.
20150 SDValue RISCVTargetLowering::LowerFormalArguments(
20151     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
20152     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
20153     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
20154 
20155   MachineFunction &MF = DAG.getMachineFunction();
20156 
20157   switch (CallConv) {
20158   default:
20159     report_fatal_error("Unsupported calling convention");
20160   case CallingConv::C:
20161   case CallingConv::Fast:
20162   case CallingConv::SPIR_KERNEL:
20163   case CallingConv::GRAAL:
20164   case CallingConv::RISCV_VectorCall:
20165     break;
20166   case CallingConv::GHC:
20167     if (Subtarget.hasStdExtE())
20168       report_fatal_error("GHC calling convention is not supported on RVE!");
20169     if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
20170       report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
20171                          "(Zdinx/D) instruction set extensions");
20172   }
20173 
20174   const Function &Func = MF.getFunction();
20175   if (Func.hasFnAttribute("interrupt")) {
20176     if (!Func.arg_empty())
20177       report_fatal_error(
20178         "Functions with the interrupt attribute cannot have arguments!");
20179 
20180     StringRef Kind =
20181       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20182 
20183     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
20184       report_fatal_error(
20185         "Function interrupt attribute argument not supported!");
20186   }
20187 
20188   EVT PtrVT = getPointerTy(DAG.getDataLayout());
20189   MVT XLenVT = Subtarget.getXLenVT();
20190   unsigned XLenInBytes = Subtarget.getXLen() / 8;
20191   // Used with vargs to acumulate store chains.
20192   std::vector<SDValue> OutChains;
20193 
20194   // Assign locations to all of the incoming arguments.
20195   SmallVector<CCValAssign, 16> ArgLocs;
20196   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20197 
20198   if (CallConv == CallingConv::GHC)
20199     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
20200   else
20201     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
20202                      CallConv == CallingConv::Fast ? CC_RISCV_FastCC
20203                                                    : CC_RISCV);
20204 
20205   for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
20206     CCValAssign &VA = ArgLocs[i];
20207     SDValue ArgValue;
20208     // Passing f64 on RV32D with a soft float ABI must be handled as a special
20209     // case.
20210     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20211       assert(VA.needsCustom());
20212       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
20213     } else if (VA.isRegLoc())
20214       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
20215     else
20216       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
20217 
20218     if (VA.getLocInfo() == CCValAssign::Indirect) {
20219       // If the original argument was split and passed by reference (e.g. i128
20220       // on RV32), we need to load all parts of it here (using the same
20221       // address). Vectors may be partly split to registers and partly to the
20222       // stack, in which case the base address is partly offset and subsequent
20223       // stores are relative to that.
20224       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
20225                                    MachinePointerInfo()));
20226       unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
20227       unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
20228       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20229       while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
20230         CCValAssign &PartVA = ArgLocs[i + 1];
20231         unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
20232         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20233         if (PartVA.getValVT().isScalableVector())
20234           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20235         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
20236         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
20237                                      MachinePointerInfo()));
20238         ++i;
20239         ++InsIdx;
20240       }
20241       continue;
20242     }
20243     InVals.push_back(ArgValue);
20244   }
20245 
20246   if (any_of(ArgLocs,
20247              [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20248     MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20249 
20250   if (IsVarArg) {
20251     ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
20252     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
20253     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
20254     MachineFrameInfo &MFI = MF.getFrameInfo();
20255     MachineRegisterInfo &RegInfo = MF.getRegInfo();
20256     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
20257 
20258     // Size of the vararg save area. For now, the varargs save area is either
20259     // zero or large enough to hold a0-a7.
20260     int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
20261     int FI;
20262 
20263     // If all registers are allocated, then all varargs must be passed on the
20264     // stack and we don't need to save any argregs.
20265     if (VarArgsSaveSize == 0) {
20266       int VaArgOffset = CCInfo.getStackSize();
20267       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
20268     } else {
20269       int VaArgOffset = -VarArgsSaveSize;
20270       FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
20271 
20272       // If saving an odd number of registers then create an extra stack slot to
20273       // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
20274       // offsets to even-numbered registered remain 2*XLEN-aligned.
20275       if (Idx % 2) {
20276         MFI.CreateFixedObject(
20277             XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
20278         VarArgsSaveSize += XLenInBytes;
20279       }
20280 
20281       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20282 
20283       // Copy the integer registers that may have been used for passing varargs
20284       // to the vararg save area.
20285       for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
20286         const Register Reg = RegInfo.createVirtualRegister(RC);
20287         RegInfo.addLiveIn(ArgRegs[I], Reg);
20288         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
20289         SDValue Store = DAG.getStore(
20290             Chain, DL, ArgValue, FIN,
20291             MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
20292         OutChains.push_back(Store);
20293         FIN =
20294             DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
20295       }
20296     }
20297 
20298     // Record the frame index of the first variable argument
20299     // which is a value necessary to VASTART.
20300     RVFI->setVarArgsFrameIndex(FI);
20301     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
20302   }
20303 
20304   // All stores are grouped in one node to allow the matching between
20305   // the size of Ins and InVals. This only happens for vararg functions.
20306   if (!OutChains.empty()) {
20307     OutChains.push_back(Chain);
20308     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
20309   }
20310 
20311   return Chain;
20312 }
20313 
20314 /// isEligibleForTailCallOptimization - Check whether the call is eligible
20315 /// for tail call optimization.
20316 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20317 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20318     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
20319     const SmallVector<CCValAssign, 16> &ArgLocs) const {
20320 
20321   auto CalleeCC = CLI.CallConv;
20322   auto &Outs = CLI.Outs;
20323   auto &Caller = MF.getFunction();
20324   auto CallerCC = Caller.getCallingConv();
20325 
20326   // Exception-handling functions need a special set of instructions to
20327   // indicate a return to the hardware. Tail-calling another function would
20328   // probably break this.
20329   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20330   // should be expanded as new function attributes are introduced.
20331   if (Caller.hasFnAttribute("interrupt"))
20332     return false;
20333 
20334   // Do not tail call opt if the stack is used to pass parameters.
20335   if (CCInfo.getStackSize() != 0)
20336     return false;
20337 
20338   // Do not tail call opt if any parameters need to be passed indirectly.
20339   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20340   // passed indirectly. So the address of the value will be passed in a
20341   // register, or if not available, then the address is put on the stack. In
20342   // order to pass indirectly, space on the stack often needs to be allocated
20343   // in order to store the value. In this case the CCInfo.getNextStackOffset()
20344   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20345   // are passed CCValAssign::Indirect.
20346   for (auto &VA : ArgLocs)
20347     if (VA.getLocInfo() == CCValAssign::Indirect)
20348       return false;
20349 
20350   // Do not tail call opt if either caller or callee uses struct return
20351   // semantics.
20352   auto IsCallerStructRet = Caller.hasStructRetAttr();
20353   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20354   if (IsCallerStructRet || IsCalleeStructRet)
20355     return false;
20356 
20357   // The callee has to preserve all registers the caller needs to preserve.
20358   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20359   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20360   if (CalleeCC != CallerCC) {
20361     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20362     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20363       return false;
20364   }
20365 
20366   // Byval parameters hand the function a pointer directly into the stack area
20367   // we want to reuse during a tail call. Working around this *is* possible
20368   // but less efficient and uglier in LowerCall.
20369   for (auto &Arg : Outs)
20370     if (Arg.Flags.isByVal())
20371       return false;
20372 
20373   return true;
20374 }
20375 
20376 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
20377   return DAG.getDataLayout().getPrefTypeAlign(
20378       VT.getTypeForEVT(*DAG.getContext()));
20379 }
20380 
20381 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20382 // and output parameter nodes.
20383 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
20384                                        SmallVectorImpl<SDValue> &InVals) const {
20385   SelectionDAG &DAG = CLI.DAG;
20386   SDLoc &DL = CLI.DL;
20387   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
20388   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20389   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
20390   SDValue Chain = CLI.Chain;
20391   SDValue Callee = CLI.Callee;
20392   bool &IsTailCall = CLI.IsTailCall;
20393   CallingConv::ID CallConv = CLI.CallConv;
20394   bool IsVarArg = CLI.IsVarArg;
20395   EVT PtrVT = getPointerTy(DAG.getDataLayout());
20396   MVT XLenVT = Subtarget.getXLenVT();
20397 
20398   MachineFunction &MF = DAG.getMachineFunction();
20399 
20400   // Analyze the operands of the call, assigning locations to each operand.
20401   SmallVector<CCValAssign, 16> ArgLocs;
20402   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20403 
20404   if (CallConv == CallingConv::GHC) {
20405     if (Subtarget.hasStdExtE())
20406       report_fatal_error("GHC calling convention is not supported on RVE!");
20407     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20408   } else
20409     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20410                       CallConv == CallingConv::Fast ? CC_RISCV_FastCC
20411                                                     : CC_RISCV);
20412 
20413   // Check if it's really possible to do a tail call.
20414   if (IsTailCall)
20415     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20416 
20417   if (IsTailCall)
20418     ++NumTailCalls;
20419   else if (CLI.CB && CLI.CB->isMustTailCall())
20420     report_fatal_error("failed to perform tail call elimination on a call "
20421                        "site marked musttail");
20422 
20423   // Get a count of how many bytes are to be pushed on the stack.
20424   unsigned NumBytes = ArgCCInfo.getStackSize();
20425 
20426   // Create local copies for byval args
20427   SmallVector<SDValue, 8> ByValArgs;
20428   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20429     ISD::ArgFlagsTy Flags = Outs[i].Flags;
20430     if (!Flags.isByVal())
20431       continue;
20432 
20433     SDValue Arg = OutVals[i];
20434     unsigned Size = Flags.getByValSize();
20435     Align Alignment = Flags.getNonZeroByValAlign();
20436 
20437     int FI =
20438         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20439     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20440     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20441 
20442     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20443                           /*IsVolatile=*/false,
20444                           /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20445                           MachinePointerInfo(), MachinePointerInfo());
20446     ByValArgs.push_back(FIPtr);
20447   }
20448 
20449   if (!IsTailCall)
20450     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20451 
20452   // Copy argument values to their designated locations.
20453   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
20454   SmallVector<SDValue, 8> MemOpChains;
20455   SDValue StackPtr;
20456   for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20457        ++i, ++OutIdx) {
20458     CCValAssign &VA = ArgLocs[i];
20459     SDValue ArgValue = OutVals[OutIdx];
20460     ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20461 
20462     // Handle passing f64 on RV32D with a soft float ABI as a special case.
20463     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20464       assert(VA.isRegLoc() && "Expected register VA assignment");
20465       assert(VA.needsCustom());
20466       SDValue SplitF64 = DAG.getNode(
20467           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20468       SDValue Lo = SplitF64.getValue(0);
20469       SDValue Hi = SplitF64.getValue(1);
20470 
20471       Register RegLo = VA.getLocReg();
20472       RegsToPass.push_back(std::make_pair(RegLo, Lo));
20473 
20474       // Get the CCValAssign for the Hi part.
20475       CCValAssign &HiVA = ArgLocs[++i];
20476 
20477       if (HiVA.isMemLoc()) {
20478         // Second half of f64 is passed on the stack.
20479         if (!StackPtr.getNode())
20480           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20481         SDValue Address =
20482             DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20483                         DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20484         // Emit the store.
20485         MemOpChains.push_back(DAG.getStore(
20486             Chain, DL, Hi, Address,
20487             MachinePointerInfo::getStack(MF, HiVA.getLocMemOffset())));
20488       } else {
20489         // Second half of f64 is passed in another GPR.
20490         Register RegHigh = HiVA.getLocReg();
20491         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20492       }
20493       continue;
20494     }
20495 
20496     // Promote the value if needed.
20497     // For now, only handle fully promoted and indirect arguments.
20498     if (VA.getLocInfo() == CCValAssign::Indirect) {
20499       // Store the argument in a stack slot and pass its address.
20500       Align StackAlign =
20501           std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20502                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
20503       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20504       // If the original argument was split (e.g. i128), we need
20505       // to store the required parts of it here (and pass just one address).
20506       // Vectors may be partly split to registers and partly to the stack, in
20507       // which case the base address is partly offset and subsequent stores are
20508       // relative to that.
20509       unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20510       unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20511       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20512       // Calculate the total size to store. We don't have access to what we're
20513       // actually storing other than performing the loop and collecting the
20514       // info.
20515       SmallVector<std::pair<SDValue, SDValue>> Parts;
20516       while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20517         SDValue PartValue = OutVals[OutIdx + 1];
20518         unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20519         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20520         EVT PartVT = PartValue.getValueType();
20521         if (PartVT.isScalableVector())
20522           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20523         StoredSize += PartVT.getStoreSize();
20524         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20525         Parts.push_back(std::make_pair(PartValue, Offset));
20526         ++i;
20527         ++OutIdx;
20528       }
20529       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20530       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20531       MemOpChains.push_back(
20532           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20533                        MachinePointerInfo::getFixedStack(MF, FI)));
20534       for (const auto &Part : Parts) {
20535         SDValue PartValue = Part.first;
20536         SDValue PartOffset = Part.second;
20537         SDValue Address =
20538             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20539         MemOpChains.push_back(
20540             DAG.getStore(Chain, DL, PartValue, Address,
20541                          MachinePointerInfo::getFixedStack(MF, FI)));
20542       }
20543       ArgValue = SpillSlot;
20544     } else {
20545       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20546     }
20547 
20548     // Use local copy if it is a byval arg.
20549     if (Flags.isByVal())
20550       ArgValue = ByValArgs[j++];
20551 
20552     if (VA.isRegLoc()) {
20553       // Queue up the argument copies and emit them at the end.
20554       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20555     } else {
20556       assert(VA.isMemLoc() && "Argument not register or memory");
20557       assert(!IsTailCall && "Tail call not allowed if stack is used "
20558                             "for passing parameters");
20559 
20560       // Work out the address of the stack slot.
20561       if (!StackPtr.getNode())
20562         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20563       SDValue Address =
20564           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20565                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
20566 
20567       // Emit the store.
20568       MemOpChains.push_back(
20569           DAG.getStore(Chain, DL, ArgValue, Address,
20570                        MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));
20571     }
20572   }
20573 
20574   // Join the stores, which are independent of one another.
20575   if (!MemOpChains.empty())
20576     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20577 
20578   SDValue Glue;
20579 
20580   // Build a sequence of copy-to-reg nodes, chained and glued together.
20581   for (auto &Reg : RegsToPass) {
20582     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20583     Glue = Chain.getValue(1);
20584   }
20585 
20586   // Validate that none of the argument registers have been marked as
20587   // reserved, if so report an error. Do the same for the return address if this
20588   // is not a tailcall.
20589   validateCCReservedRegs(RegsToPass, MF);
20590   if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20591     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
20592         MF.getFunction(),
20593         "Return address register required, but has been reserved."});
20594 
20595   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20596   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20597   // split it and then direct call can be matched by PseudoCALL.
20598   bool CalleeIsLargeExternalSymbol = false;
20599   if (getTargetMachine().getCodeModel() == CodeModel::Large) {
20600     if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20601       Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20602     else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20603       Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20604       CalleeIsLargeExternalSymbol = true;
20605     }
20606   } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20607     const GlobalValue *GV = S->getGlobal();
20608     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20609   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20610     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20611   }
20612 
20613   // The first call operand is the chain and the second is the target address.
20614   SmallVector<SDValue, 8> Ops;
20615   Ops.push_back(Chain);
20616   Ops.push_back(Callee);
20617 
20618   // Add argument registers to the end of the list so that they are
20619   // known live into the call.
20620   for (auto &Reg : RegsToPass)
20621     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20622 
20623   // Add a register mask operand representing the call-preserved registers.
20624   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20625   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20626   assert(Mask && "Missing call preserved mask for calling convention");
20627   Ops.push_back(DAG.getRegisterMask(Mask));
20628 
20629   // Glue the call to the argument copies, if any.
20630   if (Glue.getNode())
20631     Ops.push_back(Glue);
20632 
20633   assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20634          "Unexpected CFI type for a direct call");
20635 
20636   // Emit the call.
20637   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20638 
20639   // Use software guarded branch for large code model non-indirect calls
20640   // Tail call to external symbol will have a null CLI.CB and we need another
20641   // way to determine the callsite type
20642   bool NeedSWGuarded = false;
20643   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
20644       Subtarget.hasStdExtZicfilp() &&
20645       ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20646     NeedSWGuarded = true;
20647 
20648   if (IsTailCall) {
20649     MF.getFrameInfo().setHasTailCall();
20650     unsigned CallOpc =
20651         NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20652     SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20653     if (CLI.CFIType)
20654       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20655     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20656     return Ret;
20657   }
20658 
20659   unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20660   Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20661   if (CLI.CFIType)
20662     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20663   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20664   Glue = Chain.getValue(1);
20665 
20666   // Mark the end of the call, which is glued to the call itself.
20667   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20668   Glue = Chain.getValue(1);
20669 
20670   // Assign locations to each value returned by this call.
20671   SmallVector<CCValAssign, 16> RVLocs;
20672   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20673   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20674 
20675   // Copy all of the result registers out of their specified physreg.
20676   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20677     auto &VA = RVLocs[i];
20678     // Copy the value out
20679     SDValue RetValue =
20680         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20681     // Glue the RetValue to the end of the call sequence
20682     Chain = RetValue.getValue(1);
20683     Glue = RetValue.getValue(2);
20684 
20685     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20686       assert(VA.needsCustom());
20687       SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20688                                              MVT::i32, Glue);
20689       Chain = RetValue2.getValue(1);
20690       Glue = RetValue2.getValue(2);
20691       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20692                              RetValue2);
20693     } else
20694       RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20695 
20696     InVals.push_back(RetValue);
20697   }
20698 
20699   return Chain;
20700 }
20701 
20702 bool RISCVTargetLowering::CanLowerReturn(
20703     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20704     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
20705     const Type *RetTy) const {
20706   SmallVector<CCValAssign, 16> RVLocs;
20707   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20708 
20709   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20710     MVT VT = Outs[i].VT;
20711     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20712     if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20713                  /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20714       return false;
20715   }
20716   return true;
20717 }
20718 
20719 SDValue
20720 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
20721                                  bool IsVarArg,
20722                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
20723                                  const SmallVectorImpl<SDValue> &OutVals,
20724                                  const SDLoc &DL, SelectionDAG &DAG) const {
20725   MachineFunction &MF = DAG.getMachineFunction();
20726   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20727 
20728   // Stores the assignment of the return value to a location.
20729   SmallVector<CCValAssign, 16> RVLocs;
20730 
20731   // Info about the registers and stack slot.
20732   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20733                  *DAG.getContext());
20734 
20735   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20736                     nullptr, CC_RISCV);
20737 
20738   if (CallConv == CallingConv::GHC && !RVLocs.empty())
20739     report_fatal_error("GHC functions return void only");
20740 
20741   SDValue Glue;
20742   SmallVector<SDValue, 4> RetOps(1, Chain);
20743 
20744   // Copy the result values into the output registers.
20745   for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20746     SDValue Val = OutVals[OutIdx];
20747     CCValAssign &VA = RVLocs[i];
20748     assert(VA.isRegLoc() && "Can only return in registers!");
20749 
20750     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20751       // Handle returning f64 on RV32D with a soft float ABI.
20752       assert(VA.isRegLoc() && "Expected return via registers");
20753       assert(VA.needsCustom());
20754       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20755                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
20756       SDValue Lo = SplitF64.getValue(0);
20757       SDValue Hi = SplitF64.getValue(1);
20758       Register RegLo = VA.getLocReg();
20759       Register RegHi = RVLocs[++i].getLocReg();
20760 
20761       if (STI.isRegisterReservedByUser(RegLo) ||
20762           STI.isRegisterReservedByUser(RegHi))
20763         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
20764             MF.getFunction(),
20765             "Return value register required, but has been reserved."});
20766 
20767       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20768       Glue = Chain.getValue(1);
20769       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20770       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20771       Glue = Chain.getValue(1);
20772       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20773     } else {
20774       // Handle a 'normal' return.
20775       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20776       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20777 
20778       if (STI.isRegisterReservedByUser(VA.getLocReg()))
20779         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
20780             MF.getFunction(),
20781             "Return value register required, but has been reserved."});
20782 
20783       // Guarantee that all emitted copies are stuck together.
20784       Glue = Chain.getValue(1);
20785       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20786     }
20787   }
20788 
20789   RetOps[0] = Chain; // Update chain.
20790 
20791   // Add the glue node if we have it.
20792   if (Glue.getNode()) {
20793     RetOps.push_back(Glue);
20794   }
20795 
20796   if (any_of(RVLocs,
20797              [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20798     MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20799 
20800   unsigned RetOpc = RISCVISD::RET_GLUE;
20801   // Interrupt service routines use different return instructions.
20802   const Function &Func = DAG.getMachineFunction().getFunction();
20803   if (Func.hasFnAttribute("interrupt")) {
20804     if (!Func.getReturnType()->isVoidTy())
20805       report_fatal_error(
20806           "Functions with the interrupt attribute must have void return type!");
20807 
20808     MachineFunction &MF = DAG.getMachineFunction();
20809     StringRef Kind =
20810       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20811 
20812     if (Kind == "supervisor")
20813       RetOpc = RISCVISD::SRET_GLUE;
20814     else
20815       RetOpc = RISCVISD::MRET_GLUE;
20816   }
20817 
20818   return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20819 }
20820 
20821 void RISCVTargetLowering::validateCCReservedRegs(
20822     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20823     MachineFunction &MF) const {
20824   const Function &F = MF.getFunction();
20825   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20826 
20827   if (llvm::any_of(Regs, [&STI](auto Reg) {
20828         return STI.isRegisterReservedByUser(Reg.first);
20829       }))
20830     F.getContext().diagnose(DiagnosticInfoUnsupported{
20831         F, "Argument register required, but has been reserved."});
20832 }
20833 
20834 // Check if the result of the node is only used as a return value, as
20835 // otherwise we can't perform a tail-call.
20836 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
20837   if (N->getNumValues() != 1)
20838     return false;
20839   if (!N->hasNUsesOfValue(1, 0))
20840     return false;
20841 
20842   SDNode *Copy = *N->user_begin();
20843 
20844   if (Copy->getOpcode() == ISD::BITCAST) {
20845     return isUsedByReturnOnly(Copy, Chain);
20846   }
20847 
20848   // TODO: Handle additional opcodes in order to support tail-calling libcalls
20849   // with soft float ABIs.
20850   if (Copy->getOpcode() != ISD::CopyToReg) {
20851     return false;
20852   }
20853 
20854   // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20855   // isn't safe to perform a tail call.
20856   if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20857     return false;
20858 
20859   // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20860   bool HasRet = false;
20861   for (SDNode *Node : Copy->users()) {
20862     if (Node->getOpcode() != RISCVISD::RET_GLUE)
20863       return false;
20864     HasRet = true;
20865   }
20866   if (!HasRet)
20867     return false;
20868 
20869   Chain = Copy->getOperand(0);
20870   return true;
20871 }
20872 
20873 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
20874   return CI->isTailCall();
20875 }
20876 
20877 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20878 #define NODE_NAME_CASE(NODE)                                                   \
20879   case RISCVISD::NODE:                                                         \
20880     return "RISCVISD::" #NODE;
20881   // clang-format off
20882   switch ((RISCVISD::NodeType)Opcode) {
20883   case RISCVISD::FIRST_NUMBER:
20884     break;
20885   NODE_NAME_CASE(RET_GLUE)
20886   NODE_NAME_CASE(SRET_GLUE)
20887   NODE_NAME_CASE(MRET_GLUE)
20888   NODE_NAME_CASE(CALL)
20889   NODE_NAME_CASE(TAIL)
20890   NODE_NAME_CASE(SELECT_CC)
20891   NODE_NAME_CASE(BR_CC)
20892   NODE_NAME_CASE(BuildGPRPair)
20893   NODE_NAME_CASE(SplitGPRPair)
20894   NODE_NAME_CASE(BuildPairF64)
20895   NODE_NAME_CASE(SplitF64)
20896   NODE_NAME_CASE(ADD_LO)
20897   NODE_NAME_CASE(HI)
20898   NODE_NAME_CASE(LLA)
20899   NODE_NAME_CASE(ADD_TPREL)
20900   NODE_NAME_CASE(MULHSU)
20901   NODE_NAME_CASE(SHL_ADD)
20902   NODE_NAME_CASE(SLLW)
20903   NODE_NAME_CASE(SRAW)
20904   NODE_NAME_CASE(SRLW)
20905   NODE_NAME_CASE(DIVW)
20906   NODE_NAME_CASE(DIVUW)
20907   NODE_NAME_CASE(REMUW)
20908   NODE_NAME_CASE(ROLW)
20909   NODE_NAME_CASE(RORW)
20910   NODE_NAME_CASE(CLZW)
20911   NODE_NAME_CASE(CTZW)
20912   NODE_NAME_CASE(ABSW)
20913   NODE_NAME_CASE(FMV_H_X)
20914   NODE_NAME_CASE(FMV_X_ANYEXTH)
20915   NODE_NAME_CASE(FMV_X_SIGNEXTH)
20916   NODE_NAME_CASE(FMV_W_X_RV64)
20917   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20918   NODE_NAME_CASE(FCVT_X)
20919   NODE_NAME_CASE(FCVT_XU)
20920   NODE_NAME_CASE(FCVT_W_RV64)
20921   NODE_NAME_CASE(FCVT_WU_RV64)
20922   NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20923   NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20924   NODE_NAME_CASE(FROUND)
20925   NODE_NAME_CASE(FCLASS)
20926   NODE_NAME_CASE(FSGNJX)
20927   NODE_NAME_CASE(FMAX)
20928   NODE_NAME_CASE(FMIN)
20929   NODE_NAME_CASE(FLI)
20930   NODE_NAME_CASE(READ_COUNTER_WIDE)
20931   NODE_NAME_CASE(BREV8)
20932   NODE_NAME_CASE(ORC_B)
20933   NODE_NAME_CASE(ZIP)
20934   NODE_NAME_CASE(UNZIP)
20935   NODE_NAME_CASE(CLMUL)
20936   NODE_NAME_CASE(CLMULH)
20937   NODE_NAME_CASE(CLMULR)
20938   NODE_NAME_CASE(MOPR)
20939   NODE_NAME_CASE(MOPRR)
20940   NODE_NAME_CASE(SHA256SIG0)
20941   NODE_NAME_CASE(SHA256SIG1)
20942   NODE_NAME_CASE(SHA256SUM0)
20943   NODE_NAME_CASE(SHA256SUM1)
20944   NODE_NAME_CASE(SM4KS)
20945   NODE_NAME_CASE(SM4ED)
20946   NODE_NAME_CASE(SM3P0)
20947   NODE_NAME_CASE(SM3P1)
20948   NODE_NAME_CASE(TH_LWD)
20949   NODE_NAME_CASE(TH_LWUD)
20950   NODE_NAME_CASE(TH_LDD)
20951   NODE_NAME_CASE(TH_SWD)
20952   NODE_NAME_CASE(TH_SDD)
20953   NODE_NAME_CASE(VMV_V_V_VL)
20954   NODE_NAME_CASE(VMV_V_X_VL)
20955   NODE_NAME_CASE(VFMV_V_F_VL)
20956   NODE_NAME_CASE(VMV_X_S)
20957   NODE_NAME_CASE(VMV_S_X_VL)
20958   NODE_NAME_CASE(VFMV_S_F_VL)
20959   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20960   NODE_NAME_CASE(READ_VLENB)
20961   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20962   NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20963   NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20964   NODE_NAME_CASE(VSLIDEUP_VL)
20965   NODE_NAME_CASE(VSLIDE1UP_VL)
20966   NODE_NAME_CASE(VSLIDEDOWN_VL)
20967   NODE_NAME_CASE(VSLIDE1DOWN_VL)
20968   NODE_NAME_CASE(VFSLIDE1UP_VL)
20969   NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20970   NODE_NAME_CASE(VID_VL)
20971   NODE_NAME_CASE(VFNCVT_ROD_VL)
20972   NODE_NAME_CASE(VECREDUCE_ADD_VL)
20973   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20974   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20975   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20976   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20977   NODE_NAME_CASE(VECREDUCE_AND_VL)
20978   NODE_NAME_CASE(VECREDUCE_OR_VL)
20979   NODE_NAME_CASE(VECREDUCE_XOR_VL)
20980   NODE_NAME_CASE(VECREDUCE_FADD_VL)
20981   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20982   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20983   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20984   NODE_NAME_CASE(ADD_VL)
20985   NODE_NAME_CASE(AND_VL)
20986   NODE_NAME_CASE(MUL_VL)
20987   NODE_NAME_CASE(OR_VL)
20988   NODE_NAME_CASE(SDIV_VL)
20989   NODE_NAME_CASE(SHL_VL)
20990   NODE_NAME_CASE(SREM_VL)
20991   NODE_NAME_CASE(SRA_VL)
20992   NODE_NAME_CASE(SRL_VL)
20993   NODE_NAME_CASE(ROTL_VL)
20994   NODE_NAME_CASE(ROTR_VL)
20995   NODE_NAME_CASE(SUB_VL)
20996   NODE_NAME_CASE(UDIV_VL)
20997   NODE_NAME_CASE(UREM_VL)
20998   NODE_NAME_CASE(XOR_VL)
20999   NODE_NAME_CASE(AVGFLOORS_VL)
21000   NODE_NAME_CASE(AVGFLOORU_VL)
21001   NODE_NAME_CASE(AVGCEILS_VL)
21002   NODE_NAME_CASE(AVGCEILU_VL)
21003   NODE_NAME_CASE(SADDSAT_VL)
21004   NODE_NAME_CASE(UADDSAT_VL)
21005   NODE_NAME_CASE(SSUBSAT_VL)
21006   NODE_NAME_CASE(USUBSAT_VL)
21007   NODE_NAME_CASE(FADD_VL)
21008   NODE_NAME_CASE(FSUB_VL)
21009   NODE_NAME_CASE(FMUL_VL)
21010   NODE_NAME_CASE(FDIV_VL)
21011   NODE_NAME_CASE(FNEG_VL)
21012   NODE_NAME_CASE(FABS_VL)
21013   NODE_NAME_CASE(FSQRT_VL)
21014   NODE_NAME_CASE(FCLASS_VL)
21015   NODE_NAME_CASE(VFMADD_VL)
21016   NODE_NAME_CASE(VFNMADD_VL)
21017   NODE_NAME_CASE(VFMSUB_VL)
21018   NODE_NAME_CASE(VFNMSUB_VL)
21019   NODE_NAME_CASE(VFWMADD_VL)
21020   NODE_NAME_CASE(VFWNMADD_VL)
21021   NODE_NAME_CASE(VFWMSUB_VL)
21022   NODE_NAME_CASE(VFWNMSUB_VL)
21023   NODE_NAME_CASE(FCOPYSIGN_VL)
21024   NODE_NAME_CASE(SMIN_VL)
21025   NODE_NAME_CASE(SMAX_VL)
21026   NODE_NAME_CASE(UMIN_VL)
21027   NODE_NAME_CASE(UMAX_VL)
21028   NODE_NAME_CASE(BITREVERSE_VL)
21029   NODE_NAME_CASE(BSWAP_VL)
21030   NODE_NAME_CASE(CTLZ_VL)
21031   NODE_NAME_CASE(CTTZ_VL)
21032   NODE_NAME_CASE(CTPOP_VL)
21033   NODE_NAME_CASE(VFMIN_VL)
21034   NODE_NAME_CASE(VFMAX_VL)
21035   NODE_NAME_CASE(MULHS_VL)
21036   NODE_NAME_CASE(MULHU_VL)
21037   NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
21038   NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
21039   NODE_NAME_CASE(VFCVT_RM_X_F_VL)
21040   NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
21041   NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
21042   NODE_NAME_CASE(SINT_TO_FP_VL)
21043   NODE_NAME_CASE(UINT_TO_FP_VL)
21044   NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
21045   NODE_NAME_CASE(VFCVT_RM_F_X_VL)
21046   NODE_NAME_CASE(FP_EXTEND_VL)
21047   NODE_NAME_CASE(FP_ROUND_VL)
21048   NODE_NAME_CASE(STRICT_FADD_VL)
21049   NODE_NAME_CASE(STRICT_FSUB_VL)
21050   NODE_NAME_CASE(STRICT_FMUL_VL)
21051   NODE_NAME_CASE(STRICT_FDIV_VL)
21052   NODE_NAME_CASE(STRICT_FSQRT_VL)
21053   NODE_NAME_CASE(STRICT_VFMADD_VL)
21054   NODE_NAME_CASE(STRICT_VFNMADD_VL)
21055   NODE_NAME_CASE(STRICT_VFMSUB_VL)
21056   NODE_NAME_CASE(STRICT_VFNMSUB_VL)
21057   NODE_NAME_CASE(STRICT_FP_ROUND_VL)
21058   NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
21059   NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
21060   NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
21061   NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
21062   NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
21063   NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
21064   NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
21065   NODE_NAME_CASE(STRICT_FSETCC_VL)
21066   NODE_NAME_CASE(STRICT_FSETCCS_VL)
21067   NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
21068   NODE_NAME_CASE(VWMUL_VL)
21069   NODE_NAME_CASE(VWMULU_VL)
21070   NODE_NAME_CASE(VWMULSU_VL)
21071   NODE_NAME_CASE(VWADD_VL)
21072   NODE_NAME_CASE(VWADDU_VL)
21073   NODE_NAME_CASE(VWSUB_VL)
21074   NODE_NAME_CASE(VWSUBU_VL)
21075   NODE_NAME_CASE(VWADD_W_VL)
21076   NODE_NAME_CASE(VWADDU_W_VL)
21077   NODE_NAME_CASE(VWSUB_W_VL)
21078   NODE_NAME_CASE(VWSUBU_W_VL)
21079   NODE_NAME_CASE(VWSLL_VL)
21080   NODE_NAME_CASE(VFWMUL_VL)
21081   NODE_NAME_CASE(VFWADD_VL)
21082   NODE_NAME_CASE(VFWSUB_VL)
21083   NODE_NAME_CASE(VFWADD_W_VL)
21084   NODE_NAME_CASE(VFWSUB_W_VL)
21085   NODE_NAME_CASE(VWMACC_VL)
21086   NODE_NAME_CASE(VWMACCU_VL)
21087   NODE_NAME_CASE(VWMACCSU_VL)
21088   NODE_NAME_CASE(SETCC_VL)
21089   NODE_NAME_CASE(VMERGE_VL)
21090   NODE_NAME_CASE(VMAND_VL)
21091   NODE_NAME_CASE(VMOR_VL)
21092   NODE_NAME_CASE(VMXOR_VL)
21093   NODE_NAME_CASE(VMCLR_VL)
21094   NODE_NAME_CASE(VMSET_VL)
21095   NODE_NAME_CASE(VRGATHER_VX_VL)
21096   NODE_NAME_CASE(VRGATHER_VV_VL)
21097   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
21098   NODE_NAME_CASE(VSEXT_VL)
21099   NODE_NAME_CASE(VZEXT_VL)
21100   NODE_NAME_CASE(VCPOP_VL)
21101   NODE_NAME_CASE(VFIRST_VL)
21102   NODE_NAME_CASE(READ_CSR)
21103   NODE_NAME_CASE(WRITE_CSR)
21104   NODE_NAME_CASE(SWAP_CSR)
21105   NODE_NAME_CASE(CZERO_EQZ)
21106   NODE_NAME_CASE(CZERO_NEZ)
21107   NODE_NAME_CASE(SW_GUARDED_BRIND)
21108   NODE_NAME_CASE(SW_GUARDED_CALL)
21109   NODE_NAME_CASE(SW_GUARDED_TAIL)
21110   NODE_NAME_CASE(TUPLE_INSERT)
21111   NODE_NAME_CASE(TUPLE_EXTRACT)
21112   NODE_NAME_CASE(SF_VC_XV_SE)
21113   NODE_NAME_CASE(SF_VC_IV_SE)
21114   NODE_NAME_CASE(SF_VC_VV_SE)
21115   NODE_NAME_CASE(SF_VC_FV_SE)
21116   NODE_NAME_CASE(SF_VC_XVV_SE)
21117   NODE_NAME_CASE(SF_VC_IVV_SE)
21118   NODE_NAME_CASE(SF_VC_VVV_SE)
21119   NODE_NAME_CASE(SF_VC_FVV_SE)
21120   NODE_NAME_CASE(SF_VC_XVW_SE)
21121   NODE_NAME_CASE(SF_VC_IVW_SE)
21122   NODE_NAME_CASE(SF_VC_VVW_SE)
21123   NODE_NAME_CASE(SF_VC_FVW_SE)
21124   NODE_NAME_CASE(SF_VC_V_X_SE)
21125   NODE_NAME_CASE(SF_VC_V_I_SE)
21126   NODE_NAME_CASE(SF_VC_V_XV_SE)
21127   NODE_NAME_CASE(SF_VC_V_IV_SE)
21128   NODE_NAME_CASE(SF_VC_V_VV_SE)
21129   NODE_NAME_CASE(SF_VC_V_FV_SE)
21130   NODE_NAME_CASE(SF_VC_V_XVV_SE)
21131   NODE_NAME_CASE(SF_VC_V_IVV_SE)
21132   NODE_NAME_CASE(SF_VC_V_VVV_SE)
21133   NODE_NAME_CASE(SF_VC_V_FVV_SE)
21134   NODE_NAME_CASE(SF_VC_V_XVW_SE)
21135   NODE_NAME_CASE(SF_VC_V_IVW_SE)
21136   NODE_NAME_CASE(SF_VC_V_VVW_SE)
21137   NODE_NAME_CASE(SF_VC_V_FVW_SE)
21138   NODE_NAME_CASE(PROBED_ALLOCA)
21139   }
21140   // clang-format on
21141   return nullptr;
21142 #undef NODE_NAME_CASE
21143 }
21144 
21145 /// getConstraintType - Given a constraint letter, return the type of
21146 /// constraint it is for this target.
21147 RISCVTargetLowering::ConstraintType
21148 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
21149   if (Constraint.size() == 1) {
21150     switch (Constraint[0]) {
21151     default:
21152       break;
21153     case 'f':
21154     case 'R':
21155       return C_RegisterClass;
21156     case 'I':
21157     case 'J':
21158     case 'K':
21159       return C_Immediate;
21160     case 'A':
21161       return C_Memory;
21162     case 's':
21163     case 'S': // A symbolic address
21164       return C_Other;
21165     }
21166   } else {
21167     if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
21168       return C_RegisterClass;
21169     if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
21170       return C_RegisterClass;
21171   }
21172   return TargetLowering::getConstraintType(Constraint);
21173 }
21174 
21175 std::pair<unsigned, const TargetRegisterClass *>
21176 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
21177                                                   StringRef Constraint,
21178                                                   MVT VT) const {
21179   // First, see if this is a constraint that directly corresponds to a RISC-V
21180   // register class.
21181   if (Constraint.size() == 1) {
21182     switch (Constraint[0]) {
21183     case 'r':
21184       // TODO: Support fixed vectors up to XLen for P extension?
21185       if (VT.isVector())
21186         break;
21187       if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21188         return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21189       if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21190         return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21191       if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21192         return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21193       return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21194     case 'f':
21195       if (VT == MVT::f16) {
21196         if (Subtarget.hasStdExtZfhmin())
21197           return std::make_pair(0U, &RISCV::FPR16RegClass);
21198         if (Subtarget.hasStdExtZhinxmin())
21199           return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21200       } else if (VT == MVT::f32) {
21201         if (Subtarget.hasStdExtF())
21202           return std::make_pair(0U, &RISCV::FPR32RegClass);
21203         if (Subtarget.hasStdExtZfinx())
21204           return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21205       } else if (VT == MVT::f64) {
21206         if (Subtarget.hasStdExtD())
21207           return std::make_pair(0U, &RISCV::FPR64RegClass);
21208         if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21209           return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21210         if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21211           return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21212       }
21213       break;
21214     case 'R':
21215       return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21216     default:
21217       break;
21218     }
21219   } else if (Constraint == "vr") {
21220     for (const auto *RC :
21221          {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
21222           &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
21223           &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
21224           &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
21225           &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
21226           &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
21227           &RISCV::VRN2M4RegClass}) {
21228       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21229         return std::make_pair(0U, RC);
21230     }
21231   } else if (Constraint == "vd") {
21232     for (const auto *RC :
21233          {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
21234           &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
21235           &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
21236           &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
21237           &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
21238           &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
21239           &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
21240           &RISCV::VRN2M4NoV0RegClass}) {
21241       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21242         return std::make_pair(0U, RC);
21243     }
21244   } else if (Constraint == "vm") {
21245     if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
21246       return std::make_pair(0U, &RISCV::VMV0RegClass);
21247   } else if (Constraint == "cr") {
21248     if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21249       return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21250     if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21251       return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21252     if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21253       return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21254     if (!VT.isVector())
21255       return std::make_pair(0U, &RISCV::GPRCRegClass);
21256   } else if (Constraint == "cR") {
21257     return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21258   } else if (Constraint == "cf") {
21259     if (VT == MVT::f16) {
21260       if (Subtarget.hasStdExtZfhmin())
21261         return std::make_pair(0U, &RISCV::FPR16CRegClass);
21262       if (Subtarget.hasStdExtZhinxmin())
21263         return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21264     } else if (VT == MVT::f32) {
21265       if (Subtarget.hasStdExtF())
21266         return std::make_pair(0U, &RISCV::FPR32CRegClass);
21267       if (Subtarget.hasStdExtZfinx())
21268         return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21269     } else if (VT == MVT::f64) {
21270       if (Subtarget.hasStdExtD())
21271         return std::make_pair(0U, &RISCV::FPR64CRegClass);
21272       if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21273         return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21274       if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21275         return std::make_pair(0U, &RISCV::GPRCRegClass);
21276     }
21277   }
21278 
21279   // Clang will correctly decode the usage of register name aliases into their
21280   // official names. However, other frontends like `rustc` do not. This allows
21281   // users of these frontends to use the ABI names for registers in LLVM-style
21282   // register constraints.
21283   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
21284                                .Case("{zero}", RISCV::X0)
21285                                .Case("{ra}", RISCV::X1)
21286                                .Case("{sp}", RISCV::X2)
21287                                .Case("{gp}", RISCV::X3)
21288                                .Case("{tp}", RISCV::X4)
21289                                .Case("{t0}", RISCV::X5)
21290                                .Case("{t1}", RISCV::X6)
21291                                .Case("{t2}", RISCV::X7)
21292                                .Cases("{s0}", "{fp}", RISCV::X8)
21293                                .Case("{s1}", RISCV::X9)
21294                                .Case("{a0}", RISCV::X10)
21295                                .Case("{a1}", RISCV::X11)
21296                                .Case("{a2}", RISCV::X12)
21297                                .Case("{a3}", RISCV::X13)
21298                                .Case("{a4}", RISCV::X14)
21299                                .Case("{a5}", RISCV::X15)
21300                                .Case("{a6}", RISCV::X16)
21301                                .Case("{a7}", RISCV::X17)
21302                                .Case("{s2}", RISCV::X18)
21303                                .Case("{s3}", RISCV::X19)
21304                                .Case("{s4}", RISCV::X20)
21305                                .Case("{s5}", RISCV::X21)
21306                                .Case("{s6}", RISCV::X22)
21307                                .Case("{s7}", RISCV::X23)
21308                                .Case("{s8}", RISCV::X24)
21309                                .Case("{s9}", RISCV::X25)
21310                                .Case("{s10}", RISCV::X26)
21311                                .Case("{s11}", RISCV::X27)
21312                                .Case("{t3}", RISCV::X28)
21313                                .Case("{t4}", RISCV::X29)
21314                                .Case("{t5}", RISCV::X30)
21315                                .Case("{t6}", RISCV::X31)
21316                                .Default(RISCV::NoRegister);
21317   if (XRegFromAlias != RISCV::NoRegister)
21318     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
21319 
21320   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
21321   // TableGen record rather than the AsmName to choose registers for InlineAsm
21322   // constraints, plus we want to match those names to the widest floating point
21323   // register type available, manually select floating point registers here.
21324   //
21325   // The second case is the ABI name of the register, so that frontends can also
21326   // use the ABI names in register constraint lists.
21327   if (Subtarget.hasStdExtF()) {
21328     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
21329                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
21330                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
21331                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
21332                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
21333                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
21334                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
21335                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
21336                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
21337                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
21338                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
21339                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
21340                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
21341                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
21342                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
21343                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
21344                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
21345                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
21346                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
21347                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
21348                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
21349                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
21350                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
21351                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
21352                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
21353                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
21354                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
21355                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
21356                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
21357                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
21358                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
21359                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
21360                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
21361                         .Default(RISCV::NoRegister);
21362     if (FReg != RISCV::NoRegister) {
21363       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
21364       if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21365         unsigned RegNo = FReg - RISCV::F0_F;
21366         unsigned DReg = RISCV::F0_D + RegNo;
21367         return std::make_pair(DReg, &RISCV::FPR64RegClass);
21368       }
21369       if (VT == MVT::f32 || VT == MVT::Other)
21370         return std::make_pair(FReg, &RISCV::FPR32RegClass);
21371       if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21372         unsigned RegNo = FReg - RISCV::F0_F;
21373         unsigned HReg = RISCV::F0_H + RegNo;
21374         return std::make_pair(HReg, &RISCV::FPR16RegClass);
21375       }
21376     }
21377   }
21378 
21379   if (Subtarget.hasVInstructions()) {
21380     Register VReg = StringSwitch<Register>(Constraint.lower())
21381                         .Case("{v0}", RISCV::V0)
21382                         .Case("{v1}", RISCV::V1)
21383                         .Case("{v2}", RISCV::V2)
21384                         .Case("{v3}", RISCV::V3)
21385                         .Case("{v4}", RISCV::V4)
21386                         .Case("{v5}", RISCV::V5)
21387                         .Case("{v6}", RISCV::V6)
21388                         .Case("{v7}", RISCV::V7)
21389                         .Case("{v8}", RISCV::V8)
21390                         .Case("{v9}", RISCV::V9)
21391                         .Case("{v10}", RISCV::V10)
21392                         .Case("{v11}", RISCV::V11)
21393                         .Case("{v12}", RISCV::V12)
21394                         .Case("{v13}", RISCV::V13)
21395                         .Case("{v14}", RISCV::V14)
21396                         .Case("{v15}", RISCV::V15)
21397                         .Case("{v16}", RISCV::V16)
21398                         .Case("{v17}", RISCV::V17)
21399                         .Case("{v18}", RISCV::V18)
21400                         .Case("{v19}", RISCV::V19)
21401                         .Case("{v20}", RISCV::V20)
21402                         .Case("{v21}", RISCV::V21)
21403                         .Case("{v22}", RISCV::V22)
21404                         .Case("{v23}", RISCV::V23)
21405                         .Case("{v24}", RISCV::V24)
21406                         .Case("{v25}", RISCV::V25)
21407                         .Case("{v26}", RISCV::V26)
21408                         .Case("{v27}", RISCV::V27)
21409                         .Case("{v28}", RISCV::V28)
21410                         .Case("{v29}", RISCV::V29)
21411                         .Case("{v30}", RISCV::V30)
21412                         .Case("{v31}", RISCV::V31)
21413                         .Default(RISCV::NoRegister);
21414     if (VReg != RISCV::NoRegister) {
21415       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
21416         return std::make_pair(VReg, &RISCV::VMRegClass);
21417       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
21418         return std::make_pair(VReg, &RISCV::VRRegClass);
21419       for (const auto *RC :
21420            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21421         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
21422           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21423           return std::make_pair(VReg, RC);
21424         }
21425       }
21426     }
21427   }
21428 
21429   std::pair<Register, const TargetRegisterClass *> Res =
21430       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
21431 
21432   // If we picked one of the Zfinx register classes, remap it to the GPR class.
21433   // FIXME: When Zfinx is supported in CodeGen this will need to take the
21434   // Subtarget into account.
21435   if (Res.second == &RISCV::GPRF16RegClass ||
21436       Res.second == &RISCV::GPRF32RegClass ||
21437       Res.second == &RISCV::GPRPairRegClass)
21438     return std::make_pair(Res.first, &RISCV::GPRRegClass);
21439 
21440   return Res;
21441 }
21442 
21443 InlineAsm::ConstraintCode
21444 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
21445   // Currently only support length 1 constraints.
21446   if (ConstraintCode.size() == 1) {
21447     switch (ConstraintCode[0]) {
21448     case 'A':
21449       return InlineAsm::ConstraintCode::A;
21450     default:
21451       break;
21452     }
21453   }
21454 
21455   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
21456 }
21457 
21458 void RISCVTargetLowering::LowerAsmOperandForConstraint(
21459     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
21460     SelectionDAG &DAG) const {
21461   // Currently only support length 1 constraints.
21462   if (Constraint.size() == 1) {
21463     switch (Constraint[0]) {
21464     case 'I':
21465       // Validate & create a 12-bit signed immediate operand.
21466       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21467         uint64_t CVal = C->getSExtValue();
21468         if (isInt<12>(CVal))
21469           Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
21470                                                     Subtarget.getXLenVT()));
21471       }
21472       return;
21473     case 'J':
21474       // Validate & create an integer zero operand.
21475       if (isNullConstant(Op))
21476         Ops.push_back(
21477             DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
21478       return;
21479     case 'K':
21480       // Validate & create a 5-bit unsigned immediate operand.
21481       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21482         uint64_t CVal = C->getZExtValue();
21483         if (isUInt<5>(CVal))
21484           Ops.push_back(
21485               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
21486       }
21487       return;
21488     case 'S':
21489       TargetLowering::LowerAsmOperandForConstraint(Op, "s", Ops, DAG);
21490       return;
21491     default:
21492       break;
21493     }
21494   }
21495   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
21496 }
21497 
21498 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
21499                                                    Instruction *Inst,
21500                                                    AtomicOrdering Ord) const {
21501   if (Subtarget.hasStdExtZtso()) {
21502     if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21503       return Builder.CreateFence(Ord);
21504     return nullptr;
21505   }
21506 
21507   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21508     return Builder.CreateFence(Ord);
21509   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
21510     return Builder.CreateFence(AtomicOrdering::Release);
21511   return nullptr;
21512 }
21513 
21514 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
21515                                                     Instruction *Inst,
21516                                                     AtomicOrdering Ord) const {
21517   if (Subtarget.hasStdExtZtso()) {
21518     if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21519       return Builder.CreateFence(Ord);
21520     return nullptr;
21521   }
21522 
21523   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
21524     return Builder.CreateFence(AtomicOrdering::Acquire);
21525   if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
21526       Ord == AtomicOrdering::SequentiallyConsistent)
21527     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
21528   return nullptr;
21529 }
21530 
21531 TargetLowering::AtomicExpansionKind
21532 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
21533   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
21534   // point operations can't be used in an lr/sc sequence without breaking the
21535   // forward-progress guarantee.
21536   if (AI->isFloatingPointOperation() ||
21537       AI->getOperation() == AtomicRMWInst::UIncWrap ||
21538       AI->getOperation() == AtomicRMWInst::UDecWrap ||
21539       AI->getOperation() == AtomicRMWInst::USubCond ||
21540       AI->getOperation() == AtomicRMWInst::USubSat)
21541     return AtomicExpansionKind::CmpXChg;
21542 
21543   // Don't expand forced atomics, we want to have __sync libcalls instead.
21544   if (Subtarget.hasForcedAtomics())
21545     return AtomicExpansionKind::None;
21546 
21547   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21548   if (AI->getOperation() == AtomicRMWInst::Nand) {
21549     if (Subtarget.hasStdExtZacas() &&
21550         (Size >= 32 || Subtarget.hasStdExtZabha()))
21551       return AtomicExpansionKind::CmpXChg;
21552     if (Size < 32)
21553       return AtomicExpansionKind::MaskedIntrinsic;
21554   }
21555 
21556   if (Size < 32 && !Subtarget.hasStdExtZabha())
21557     return AtomicExpansionKind::MaskedIntrinsic;
21558 
21559   return AtomicExpansionKind::None;
21560 }
21561 
21562 static Intrinsic::ID
21563 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
21564   if (XLen == 32) {
21565     switch (BinOp) {
21566     default:
21567       llvm_unreachable("Unexpected AtomicRMW BinOp");
21568     case AtomicRMWInst::Xchg:
21569       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
21570     case AtomicRMWInst::Add:
21571       return Intrinsic::riscv_masked_atomicrmw_add_i32;
21572     case AtomicRMWInst::Sub:
21573       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
21574     case AtomicRMWInst::Nand:
21575       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
21576     case AtomicRMWInst::Max:
21577       return Intrinsic::riscv_masked_atomicrmw_max_i32;
21578     case AtomicRMWInst::Min:
21579       return Intrinsic::riscv_masked_atomicrmw_min_i32;
21580     case AtomicRMWInst::UMax:
21581       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
21582     case AtomicRMWInst::UMin:
21583       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
21584     }
21585   }
21586 
21587   if (XLen == 64) {
21588     switch (BinOp) {
21589     default:
21590       llvm_unreachable("Unexpected AtomicRMW BinOp");
21591     case AtomicRMWInst::Xchg:
21592       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21593     case AtomicRMWInst::Add:
21594       return Intrinsic::riscv_masked_atomicrmw_add_i64;
21595     case AtomicRMWInst::Sub:
21596       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21597     case AtomicRMWInst::Nand:
21598       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21599     case AtomicRMWInst::Max:
21600       return Intrinsic::riscv_masked_atomicrmw_max_i64;
21601     case AtomicRMWInst::Min:
21602       return Intrinsic::riscv_masked_atomicrmw_min_i64;
21603     case AtomicRMWInst::UMax:
21604       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21605     case AtomicRMWInst::UMin:
21606       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21607     }
21608   }
21609 
21610   llvm_unreachable("Unexpected XLen\n");
21611 }
21612 
21613 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
21614     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
21615     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
21616   // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
21617   // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
21618   // mask, as this produces better code than the LR/SC loop emitted by
21619   // int_riscv_masked_atomicrmw_xchg.
21620   if (AI->getOperation() == AtomicRMWInst::Xchg &&
21621       isa<ConstantInt>(AI->getValOperand())) {
21622     ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
21623     if (CVal->isZero())
21624       return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
21625                                      Builder.CreateNot(Mask, "Inv_Mask"),
21626                                      AI->getAlign(), Ord);
21627     if (CVal->isMinusOne())
21628       return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
21629                                      AI->getAlign(), Ord);
21630   }
21631 
21632   unsigned XLen = Subtarget.getXLen();
21633   Value *Ordering =
21634       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
21635   Type *Tys[] = {AlignedAddr->getType()};
21636   Function *LrwOpScwLoop = Intrinsic::getOrInsertDeclaration(
21637       AI->getModule(),
21638       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
21639 
21640   if (XLen == 64) {
21641     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
21642     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21643     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
21644   }
21645 
21646   Value *Result;
21647 
21648   // Must pass the shift amount needed to sign extend the loaded value prior
21649   // to performing a signed comparison for min/max. ShiftAmt is the number of
21650   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
21651   // is the number of bits to left+right shift the value in order to
21652   // sign-extend.
21653   if (AI->getOperation() == AtomicRMWInst::Min ||
21654       AI->getOperation() == AtomicRMWInst::Max) {
21655     const DataLayout &DL = AI->getDataLayout();
21656     unsigned ValWidth =
21657         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
21658     Value *SextShamt =
21659         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
21660     Result = Builder.CreateCall(LrwOpScwLoop,
21661                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21662   } else {
21663     Result =
21664         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21665   }
21666 
21667   if (XLen == 64)
21668     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21669   return Result;
21670 }
21671 
21672 TargetLowering::AtomicExpansionKind
21673 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
21674     AtomicCmpXchgInst *CI) const {
21675   // Don't expand forced atomics, we want to have __sync libcalls instead.
21676   if (Subtarget.hasForcedAtomics())
21677     return AtomicExpansionKind::None;
21678 
21679   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
21680   if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21681       (Size == 8 || Size == 16))
21682     return AtomicExpansionKind::MaskedIntrinsic;
21683   return AtomicExpansionKind::None;
21684 }
21685 
21686 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
21687     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
21688     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
21689   unsigned XLen = Subtarget.getXLen();
21690   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
21691   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21692   if (XLen == 64) {
21693     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
21694     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
21695     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21696     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21697   }
21698   Type *Tys[] = {AlignedAddr->getType()};
21699   Value *Result = Builder.CreateIntrinsic(
21700       CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21701   if (XLen == 64)
21702     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21703   return Result;
21704 }
21705 
21706 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
21707                                                         EVT DataVT) const {
21708   // We have indexed loads for all supported EEW types. Indices are always
21709   // zero extended.
21710   return Extend.getOpcode() == ISD::ZERO_EXTEND &&
21711          isTypeLegal(Extend.getValueType()) &&
21712          isTypeLegal(Extend.getOperand(0).getValueType()) &&
21713          Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
21714 }
21715 
21716 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
21717                                                EVT VT) const {
21718   if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
21719     return false;
21720 
21721   switch (FPVT.getSimpleVT().SimpleTy) {
21722   case MVT::f16:
21723     return Subtarget.hasStdExtZfhmin();
21724   case MVT::f32:
21725     return Subtarget.hasStdExtF();
21726   case MVT::f64:
21727     return Subtarget.hasStdExtD();
21728   default:
21729     return false;
21730   }
21731 }
21732 
21733 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
21734   // If we are using the small code model, we can reduce size of jump table
21735   // entry to 4 bytes.
21736   if (Subtarget.is64Bit() && !isPositionIndependent() &&
21737       getTargetMachine().getCodeModel() == CodeModel::Small) {
21738     return MachineJumpTableInfo::EK_Custom32;
21739   }
21740   return TargetLowering::getJumpTableEncoding();
21741 }
21742 
21743 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
21744     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21745     unsigned uid, MCContext &Ctx) const {
21746   assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21747          getTargetMachine().getCodeModel() == CodeModel::Small);
21748   return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21749 }
21750 
21751 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
21752   // We define vscale to be VLEN/RVVBitsPerBlock.  VLEN is always a power
21753   // of two >= 64, and RVVBitsPerBlock is 64.  Thus, vscale must be
21754   // a power of two as well.
21755   // FIXME: This doesn't work for zve32, but that's already broken
21756   // elsewhere for the same reason.
21757   assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21758   static_assert(RISCV::RVVBitsPerBlock == 64,
21759                 "RVVBitsPerBlock changed, audit needed");
21760   return true;
21761 }
21762 
21763 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
21764                                                  SDValue &Offset,
21765                                                  ISD::MemIndexedMode &AM,
21766                                                  SelectionDAG &DAG) const {
21767   // Target does not support indexed loads.
21768   if (!Subtarget.hasVendorXTHeadMemIdx())
21769     return false;
21770 
21771   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21772     return false;
21773 
21774   Base = Op->getOperand(0);
21775   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21776     int64_t RHSC = RHS->getSExtValue();
21777     if (Op->getOpcode() == ISD::SUB)
21778       RHSC = -(uint64_t)RHSC;
21779 
21780     // The constants that can be encoded in the THeadMemIdx instructions
21781     // are of the form (sign_extend(imm5) << imm2).
21782     bool isLegalIndexedOffset = false;
21783     for (unsigned i = 0; i < 4; i++)
21784       if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21785         isLegalIndexedOffset = true;
21786         break;
21787       }
21788 
21789     if (!isLegalIndexedOffset)
21790       return false;
21791 
21792     Offset = Op->getOperand(1);
21793     return true;
21794   }
21795 
21796   return false;
21797 }
21798 
21799 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
21800                                                     SDValue &Offset,
21801                                                     ISD::MemIndexedMode &AM,
21802                                                     SelectionDAG &DAG) const {
21803   EVT VT;
21804   SDValue Ptr;
21805   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21806     VT = LD->getMemoryVT();
21807     Ptr = LD->getBasePtr();
21808   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21809     VT = ST->getMemoryVT();
21810     Ptr = ST->getBasePtr();
21811   } else
21812     return false;
21813 
21814   if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21815     return false;
21816 
21817   AM = ISD::PRE_INC;
21818   return true;
21819 }
21820 
21821 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
21822                                                      SDValue &Base,
21823                                                      SDValue &Offset,
21824                                                      ISD::MemIndexedMode &AM,
21825                                                      SelectionDAG &DAG) const {
21826   if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
21827     if (Op->getOpcode() != ISD::ADD)
21828       return false;
21829 
21830     if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21831       Base = LS->getBasePtr();
21832     else
21833       return false;
21834 
21835     if (Base == Op->getOperand(0))
21836       Offset = Op->getOperand(1);
21837     else if (Base == Op->getOperand(1))
21838       Offset = Op->getOperand(0);
21839     else
21840       return false;
21841 
21842     AM = ISD::POST_INC;
21843     return true;
21844   }
21845 
21846   EVT VT;
21847   SDValue Ptr;
21848   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21849     VT = LD->getMemoryVT();
21850     Ptr = LD->getBasePtr();
21851   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21852     VT = ST->getMemoryVT();
21853     Ptr = ST->getBasePtr();
21854   } else
21855     return false;
21856 
21857   if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21858     return false;
21859   // Post-indexing updates the base, so it's not a valid transform
21860   // if that's not the same as the load's pointer.
21861   if (Ptr != Base)
21862     return false;
21863 
21864   AM = ISD::POST_INC;
21865   return true;
21866 }
21867 
21868 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
21869                                                      EVT VT) const {
21870   EVT SVT = VT.getScalarType();
21871 
21872   if (!SVT.isSimple())
21873     return false;
21874 
21875   switch (SVT.getSimpleVT().SimpleTy) {
21876   case MVT::f16:
21877     return VT.isVector() ? Subtarget.hasVInstructionsF16()
21878                          : Subtarget.hasStdExtZfhOrZhinx();
21879   case MVT::f32:
21880     return Subtarget.hasStdExtFOrZfinx();
21881   case MVT::f64:
21882     return Subtarget.hasStdExtDOrZdinx();
21883   default:
21884     break;
21885   }
21886 
21887   return false;
21888 }
21889 
21890 ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
21891   // Zacas will use amocas.w which does not require extension.
21892   return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21893 }
21894 
21895 Register RISCVTargetLowering::getExceptionPointerRegister(
21896     const Constant *PersonalityFn) const {
21897   return RISCV::X10;
21898 }
21899 
21900 Register RISCVTargetLowering::getExceptionSelectorRegister(
21901     const Constant *PersonalityFn) const {
21902   return RISCV::X11;
21903 }
21904 
21905 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
21906   // Return false to suppress the unnecessary extensions if the LibCall
21907   // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21908   if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21909                                   Type.getSizeInBits() < Subtarget.getXLen()))
21910     return false;
21911 
21912   return true;
21913 }
21914 
21915 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(Type *Ty,
21916                                                         bool IsSigned) const {
21917   if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
21918     return true;
21919 
21920   return IsSigned;
21921 }
21922 
21923 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
21924                                                  SDValue C) const {
21925   // Check integral scalar types.
21926   if (!VT.isScalarInteger())
21927     return false;
21928 
21929   // Omit the optimization if the sub target has the M extension and the data
21930   // size exceeds XLen.
21931   const bool HasZmmul = Subtarget.hasStdExtZmmul();
21932   if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21933     return false;
21934 
21935   auto *ConstNode = cast<ConstantSDNode>(C);
21936   const APInt &Imm = ConstNode->getAPIntValue();
21937 
21938   // Break the MUL to a SLLI and an ADD/SUB.
21939   if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21940       (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21941     return true;
21942 
21943   // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21944   if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21945       ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21946        (Imm - 8).isPowerOf2()))
21947     return true;
21948 
21949   // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21950   // a pair of LUI/ADDI.
21951   if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21952       ConstNode->hasOneUse()) {
21953     APInt ImmS = Imm.ashr(Imm.countr_zero());
21954     if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21955         (1 - ImmS).isPowerOf2())
21956       return true;
21957   }
21958 
21959   return false;
21960 }
21961 
21962 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
21963                                                       SDValue ConstNode) const {
21964   // Let the DAGCombiner decide for vectors.
21965   EVT VT = AddNode.getValueType();
21966   if (VT.isVector())
21967     return true;
21968 
21969   // Let the DAGCombiner decide for larger types.
21970   if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21971     return true;
21972 
21973   // It is worse if c1 is simm12 while c1*c2 is not.
21974   ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21975   ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21976   const APInt &C1 = C1Node->getAPIntValue();
21977   const APInt &C2 = C2Node->getAPIntValue();
21978   if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21979     return false;
21980 
21981   // Default to true and let the DAGCombiner decide.
21982   return true;
21983 }
21984 
21985 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
21986     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21987     unsigned *Fast) const {
21988   if (!VT.isVector()) {
21989     if (Fast)
21990       *Fast = Subtarget.enableUnalignedScalarMem();
21991     return Subtarget.enableUnalignedScalarMem();
21992   }
21993 
21994   // All vector implementations must support element alignment
21995   EVT ElemVT = VT.getVectorElementType();
21996   if (Alignment >= ElemVT.getStoreSize()) {
21997     if (Fast)
21998       *Fast = 1;
21999     return true;
22000   }
22001 
22002   // Note: We lower an unmasked unaligned vector access to an equally sized
22003   // e8 element type access.  Given this, we effectively support all unmasked
22004   // misaligned accesses.  TODO: Work through the codegen implications of
22005   // allowing such accesses to be formed, and considered fast.
22006   if (Fast)
22007     *Fast = Subtarget.enableUnalignedVectorMem();
22008   return Subtarget.enableUnalignedVectorMem();
22009 }
22010 
22011 
22012 EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
22013                                              const AttributeList &FuncAttributes) const {
22014   if (!Subtarget.hasVInstructions())
22015     return MVT::Other;
22016 
22017   if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
22018     return MVT::Other;
22019 
22020   // We use LMUL1 memory operations here for a non-obvious reason.  Our caller
22021   // has an expansion threshold, and we want the number of hardware memory
22022   // operations to correspond roughly to that threshold.  LMUL>1 operations
22023   // are typically expanded linearly internally, and thus correspond to more
22024   // than one actual memory operation.  Note that store merging and load
22025   // combining will typically form larger LMUL operations from the LMUL1
22026   // operations emitted here, and that's okay because combining isn't
22027   // introducing new memory operations; it's just merging existing ones.
22028   const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
22029   if (Op.size() < MinVLenInBytes)
22030     // TODO: Figure out short memops.  For the moment, do the default thing
22031     // which ends up using scalar sequences.
22032     return MVT::Other;
22033 
22034   // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
22035   // fixed vectors.
22036   if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
22037     return MVT::Other;
22038 
22039   // Prefer i8 for non-zero memset as it allows us to avoid materializing
22040   // a large scalar constant and instead use vmv.v.x/i to do the
22041   // broadcast.  For everything else, prefer ELenVT to minimize VL and thus
22042   // maximize the chance we can encode the size in the vsetvli.
22043   MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
22044   MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
22045 
22046   // Do we have sufficient alignment for our preferred VT?  If not, revert
22047   // to largest size allowed by our alignment criteria.
22048   if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
22049     Align RequiredAlign(PreferredVT.getStoreSize());
22050     if (Op.isFixedDstAlign())
22051       RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
22052     if (Op.isMemcpy())
22053       RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
22054     PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
22055   }
22056   return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
22057 }
22058 
22059 bool RISCVTargetLowering::splitValueIntoRegisterParts(
22060     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
22061     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
22062   bool IsABIRegCopy = CC.has_value();
22063   EVT ValueVT = Val.getValueType();
22064 
22065   MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
22066   if ((ValueVT == PairVT ||
22067        (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
22068         ValueVT == MVT::f64)) &&
22069       NumParts == 1 && PartVT == MVT::Untyped) {
22070     // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
22071     MVT XLenVT = Subtarget.getXLenVT();
22072     if (ValueVT == MVT::f64)
22073       Val = DAG.getBitcast(MVT::i64, Val);
22074     auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
22075     // Always creating an MVT::Untyped part, so always use
22076     // RISCVISD::BuildGPRPair.
22077     Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
22078     return true;
22079   }
22080 
22081   if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22082       PartVT == MVT::f32) {
22083     // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
22084     // nan, and cast to f32.
22085     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
22086     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
22087     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
22088                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
22089     Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
22090     Parts[0] = Val;
22091     return true;
22092   }
22093 
22094   if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
22095 #ifndef NDEBUG
22096     unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
22097     [[maybe_unused]] unsigned ValLMUL =
22098         divideCeil(ValueVT.getSizeInBits().getKnownMinValue(),
22099                    ValNF * RISCV::RVVBitsPerBlock);
22100     unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
22101     [[maybe_unused]] unsigned PartLMUL =
22102         divideCeil(PartVT.getSizeInBits().getKnownMinValue(),
22103                    PartNF * RISCV::RVVBitsPerBlock);
22104     assert(ValNF == PartNF && ValLMUL == PartLMUL &&
22105            "RISC-V vector tuple type only accepts same register class type "
22106            "TUPLE_INSERT");
22107 #endif
22108 
22109     Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
22110                       Val, DAG.getVectorIdxConstant(0, DL));
22111     Parts[0] = Val;
22112     return true;
22113   }
22114 
22115   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22116     LLVMContext &Context = *DAG.getContext();
22117     EVT ValueEltVT = ValueVT.getVectorElementType();
22118     EVT PartEltVT = PartVT.getVectorElementType();
22119     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22120     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22121     if (PartVTBitSize % ValueVTBitSize == 0) {
22122       assert(PartVTBitSize >= ValueVTBitSize);
22123       // If the element types are different, bitcast to the same element type of
22124       // PartVT first.
22125       // Give an example here, we want copy a <vscale x 1 x i8> value to
22126       // <vscale x 4 x i16>.
22127       // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
22128       // subvector, then we can bitcast to <vscale x 4 x i16>.
22129       if (ValueEltVT != PartEltVT) {
22130         if (PartVTBitSize > ValueVTBitSize) {
22131           unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22132           assert(Count != 0 && "The number of element should not be zero.");
22133           EVT SameEltTypeVT =
22134               EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
22135           Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
22136                             DAG.getUNDEF(SameEltTypeVT), Val,
22137                             DAG.getVectorIdxConstant(0, DL));
22138         }
22139         Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
22140       } else {
22141         Val =
22142             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
22143                         Val, DAG.getVectorIdxConstant(0, DL));
22144       }
22145       Parts[0] = Val;
22146       return true;
22147     }
22148   }
22149 
22150   return false;
22151 }
22152 
22153 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
22154     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
22155     MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
22156   bool IsABIRegCopy = CC.has_value();
22157 
22158   MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
22159   if ((ValueVT == PairVT ||
22160        (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
22161         ValueVT == MVT::f64)) &&
22162       NumParts == 1 && PartVT == MVT::Untyped) {
22163     // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
22164     MVT XLenVT = Subtarget.getXLenVT();
22165 
22166     SDValue Val = Parts[0];
22167     // Always starting with an MVT::Untyped part, so always use
22168     // RISCVISD::SplitGPRPair
22169     Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
22170                       Val);
22171     Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
22172                       Val.getValue(1));
22173     if (ValueVT == MVT::f64)
22174       Val = DAG.getBitcast(ValueVT, Val);
22175     return Val;
22176   }
22177 
22178   if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22179       PartVT == MVT::f32) {
22180     SDValue Val = Parts[0];
22181 
22182     // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
22183     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
22184     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
22185     Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
22186     return Val;
22187   }
22188 
22189   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22190     LLVMContext &Context = *DAG.getContext();
22191     SDValue Val = Parts[0];
22192     EVT ValueEltVT = ValueVT.getVectorElementType();
22193     EVT PartEltVT = PartVT.getVectorElementType();
22194     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22195     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22196     if (PartVTBitSize % ValueVTBitSize == 0) {
22197       assert(PartVTBitSize >= ValueVTBitSize);
22198       EVT SameEltTypeVT = ValueVT;
22199       // If the element types are different, convert it to the same element type
22200       // of PartVT.
22201       // Give an example here, we want copy a <vscale x 1 x i8> value from
22202       // <vscale x 4 x i16>.
22203       // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
22204       // then we can extract <vscale x 1 x i8>.
22205       if (ValueEltVT != PartEltVT) {
22206         unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22207         assert(Count != 0 && "The number of element should not be zero.");
22208         SameEltTypeVT =
22209             EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
22210         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
22211       }
22212       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
22213                         DAG.getVectorIdxConstant(0, DL));
22214       return Val;
22215     }
22216   }
22217   return SDValue();
22218 }
22219 
22220 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
22221   // When aggressively optimizing for code size, we prefer to use a div
22222   // instruction, as it is usually smaller than the alternative sequence.
22223   // TODO: Add vector division?
22224   bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
22225   return OptSize && !VT.isVector();
22226 }
22227 
22228 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
22229   // Scalarize zero_ext and sign_ext might stop match to widening instruction in
22230   // some situation.
22231   unsigned Opc = N->getOpcode();
22232   if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
22233     return false;
22234   return true;
22235 }
22236 
22237 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
22238   Module *M = IRB.GetInsertBlock()->getModule();
22239   Function *ThreadPointerFunc =
22240       Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);
22241   return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
22242                                 IRB.CreateCall(ThreadPointerFunc), Offset);
22243 }
22244 
22245 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
22246   // Fuchsia provides a fixed TLS slot for the stack cookie.
22247   // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
22248   if (Subtarget.isTargetFuchsia())
22249     return useTpOffset(IRB, -0x10);
22250 
22251   // Android provides a fixed TLS slot for the stack cookie. See the definition
22252   // of TLS_SLOT_STACK_GUARD in
22253   // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
22254   if (Subtarget.isTargetAndroid())
22255     return useTpOffset(IRB, -0x18);
22256 
22257   Module *M = IRB.GetInsertBlock()->getModule();
22258 
22259   if (M->getStackProtectorGuard() == "tls") {
22260     // Users must specify the offset explicitly
22261     int Offset = M->getStackProtectorGuardOffset();
22262     return useTpOffset(IRB, Offset);
22263   }
22264 
22265   return TargetLowering::getIRStackGuard(IRB);
22266 }
22267 
22268 bool RISCVTargetLowering::isLegalInterleavedAccessType(
22269     VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
22270     const DataLayout &DL) const {
22271   EVT VT = getValueType(DL, VTy);
22272   // Don't lower vlseg/vsseg for vector types that can't be split.
22273   if (!isTypeLegal(VT))
22274     return false;
22275 
22276   if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
22277       !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
22278                                       Alignment))
22279     return false;
22280 
22281   MVT ContainerVT = VT.getSimpleVT();
22282 
22283   if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
22284     if (!Subtarget.useRVVForFixedLengthVectors())
22285       return false;
22286     // Sometimes the interleaved access pass picks up splats as interleaves of
22287     // one element. Don't lower these.
22288     if (FVTy->getNumElements() < 2)
22289       return false;
22290 
22291     ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT());
22292   } else {
22293     // The intrinsics for scalable vectors are not overloaded on pointer type
22294     // and can only handle the default address space.
22295     if (AddrSpace)
22296       return false;
22297   }
22298 
22299   // Need to make sure that EMUL * NFIELDS ≤ 8
22300   auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
22301   if (Fractional)
22302     return true;
22303   return Factor * LMUL <= 8;
22304 }
22305 
22306 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
22307                                                   Align Alignment) const {
22308   if (!Subtarget.hasVInstructions())
22309     return false;
22310 
22311   // Only support fixed vectors if we know the minimum vector size.
22312   if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
22313     return false;
22314 
22315   EVT ScalarType = DataType.getScalarType();
22316   if (!isLegalElementTypeForRVV(ScalarType))
22317     return false;
22318 
22319   if (!Subtarget.enableUnalignedVectorMem() &&
22320       Alignment < ScalarType.getStoreSize())
22321     return false;
22322 
22323   return true;
22324 }
22325 
22326 static const Intrinsic::ID FixedVlsegIntrIds[] = {
22327     Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
22328     Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
22329     Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
22330     Intrinsic::riscv_seg8_load};
22331 
22332 /// Lower an interleaved load into a vlsegN intrinsic.
22333 ///
22334 /// E.g. Lower an interleaved load (Factor = 2):
22335 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
22336 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>  ; Extract even elements
22337 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>  ; Extract odd elements
22338 ///
22339 /// Into:
22340 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
22341 ///                                        %ptr, i64 4)
22342 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
22343 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
22344 bool RISCVTargetLowering::lowerInterleavedLoad(
22345     LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
22346     ArrayRef<unsigned> Indices, unsigned Factor) const {
22347   assert(Indices.size() == Shuffles.size());
22348 
22349   IRBuilder<> Builder(LI);
22350 
22351   auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
22352   if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
22353                                     LI->getPointerAddressSpace(),
22354                                     LI->getDataLayout()))
22355     return false;
22356 
22357   auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22358 
22359   // If the segment load is going to be performed segment at a time anyways
22360   // and there's only one element used, use a strided load instead.  This
22361   // will be equally fast, and create less vector register pressure.
22362   if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
22363     unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22364     Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22365     Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22366     Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
22367     Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
22368     Value *VL = Builder.getInt32(VTy->getNumElements());
22369 
22370     CallInst *CI =
22371         Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
22372                                 {VTy, BasePtr->getType(), Stride->getType()},
22373                                 {BasePtr, Stride, Mask, VL});
22374     CI->addParamAttr(
22375         0, Attribute::getWithAlignment(CI->getContext(), LI->getAlign()));
22376     Shuffles[0]->replaceAllUsesWith(CI);
22377     return true;
22378   };
22379 
22380   Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22381 
22382   CallInst *VlsegN = Builder.CreateIntrinsic(
22383       FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
22384       {LI->getPointerOperand(), VL});
22385 
22386   for (unsigned i = 0; i < Shuffles.size(); i++) {
22387     Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
22388     Shuffles[i]->replaceAllUsesWith(SubVec);
22389   }
22390 
22391   return true;
22392 }
22393 
22394 static const Intrinsic::ID FixedVssegIntrIds[] = {
22395     Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22396     Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22397     Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22398     Intrinsic::riscv_seg8_store};
22399 
22400 /// Lower an interleaved store into a vssegN intrinsic.
22401 ///
22402 /// E.g. Lower an interleaved store (Factor = 3):
22403 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
22404 ///                  <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
22405 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
22406 ///
22407 /// Into:
22408 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
22409 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
22410 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
22411 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
22412 ///                                              %ptr, i32 4)
22413 ///
22414 /// Note that the new shufflevectors will be removed and we'll only generate one
22415 /// vsseg3 instruction in CodeGen.
22416 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
22417                                                 ShuffleVectorInst *SVI,
22418                                                 unsigned Factor) const {
22419   IRBuilder<> Builder(SI);
22420   auto Mask = SVI->getShuffleMask();
22421   auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
22422   // Given SVI : <n*factor x ty>, then VTy : <n x ty>
22423   auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
22424                                    ShuffleVTy->getNumElements() / Factor);
22425   if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
22426                                     SI->getPointerAddressSpace(),
22427                                     SI->getDataLayout()))
22428     return false;
22429 
22430   auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22431 
22432   unsigned Index;
22433   // If the segment store only has one active lane (i.e. the interleave is
22434   // just a spread shuffle), we can use a strided store instead.  This will
22435   // be equally fast, and create less vector register pressure.
22436   if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
22437       isSpreadMask(Mask, Factor, Index)) {
22438     unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22439     Value *Data = SVI->getOperand(0);
22440     auto *DataVTy = cast<FixedVectorType>(Data->getType());
22441     Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22442     Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22443     Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
22444     Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
22445     Value *VL = Builder.getInt32(VTy->getNumElements());
22446 
22447     CallInst *CI = Builder.CreateIntrinsic(
22448         Intrinsic::experimental_vp_strided_store,
22449         {Data->getType(), BasePtr->getType(), Stride->getType()},
22450         {Data, BasePtr, Stride, Mask, VL});
22451     CI->addParamAttr(
22452         1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
22453 
22454     return true;
22455   }
22456 
22457   Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
22458       SI->getModule(), FixedVssegIntrIds[Factor - 2],
22459       {VTy, SI->getPointerOperandType(), XLenTy});
22460 
22461   SmallVector<Value *, 10> Ops;
22462 
22463   for (unsigned i = 0; i < Factor; i++) {
22464     Value *Shuffle = Builder.CreateShuffleVector(
22465         SVI->getOperand(0), SVI->getOperand(1),
22466         createSequentialMask(Mask[i], VTy->getNumElements(), 0));
22467     Ops.push_back(Shuffle);
22468   }
22469   // This VL should be OK (should be executable in one vsseg instruction,
22470   // potentially under larger LMULs) because we checked that the fixed vector
22471   // type fits in isLegalInterleavedAccessType
22472   Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22473   Ops.append({SI->getPointerOperand(), VL});
22474 
22475   Builder.CreateCall(VssegNFunc, Ops);
22476 
22477   return true;
22478 }
22479 
22480 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
22481     LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const {
22482   unsigned Factor = DeinterleaveValues.size();
22483   if (Factor > 8)
22484     return false;
22485 
22486   assert(LI->isSimple());
22487   IRBuilder<> Builder(LI);
22488 
22489   auto *ResVTy = cast<VectorType>(DeinterleaveValues[0]->getType());
22490 
22491   const DataLayout &DL = LI->getDataLayout();
22492 
22493   if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
22494                                     LI->getPointerAddressSpace(), DL))
22495     return false;
22496 
22497   Value *Return;
22498   Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22499 
22500   if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22501     Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22502     Return =
22503         Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
22504                                 {ResVTy, LI->getPointerOperandType(), XLenTy},
22505                                 {LI->getPointerOperand(), VL});
22506   } else {
22507     static const Intrinsic::ID IntrIds[] = {
22508         Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
22509         Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
22510         Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
22511         Intrinsic::riscv_vlseg8};
22512 
22513     unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
22514     unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
22515     Type *VecTupTy = TargetExtType::get(
22516         LI->getContext(), "riscv.vector.tuple",
22517         ScalableVectorType::get(Type::getInt8Ty(LI->getContext()),
22518                                 NumElts * SEW / 8),
22519         Factor);
22520 
22521     Value *VL = Constant::getAllOnesValue(XLenTy);
22522 
22523     Value *Vlseg = Builder.CreateIntrinsic(
22524         IntrIds[Factor - 2], {VecTupTy, XLenTy},
22525         {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
22526          ConstantInt::get(XLenTy, Log2_64(SEW))});
22527 
22528     SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
22529     Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
22530     for (unsigned i = 0; i < Factor; ++i) {
22531       Value *VecExtract = Builder.CreateIntrinsic(
22532           Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
22533           {Vlseg, Builder.getInt32(i)});
22534       Return = Builder.CreateInsertValue(Return, VecExtract, i);
22535     }
22536   }
22537 
22538   for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) {
22539     // We have to create a brand new ExtractValue to replace each
22540     // of these old ExtractValue instructions.
22541     Value *NewEV =
22542         Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
22543     DIV->replaceAllUsesWith(NewEV);
22544   }
22545 
22546   return true;
22547 }
22548 
22549 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
22550     StoreInst *SI, ArrayRef<Value *> InterleaveValues) const {
22551   unsigned Factor = InterleaveValues.size();
22552   if (Factor > 8)
22553     return false;
22554 
22555   assert(SI->isSimple());
22556   IRBuilder<> Builder(SI);
22557 
22558   auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());
22559   const DataLayout &DL = SI->getDataLayout();
22560 
22561   if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
22562                                     SI->getPointerAddressSpace(), DL))
22563     return false;
22564 
22565   Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22566 
22567   if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
22568     Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
22569         SI->getModule(), FixedVssegIntrIds[Factor - 2],
22570         {InVTy, SI->getPointerOperandType(), XLenTy});
22571 
22572     SmallVector<Value *, 10> Ops(InterleaveValues.begin(),
22573                                  InterleaveValues.end());
22574     Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22575     Ops.append({SI->getPointerOperand(), VL});
22576 
22577     Builder.CreateCall(VssegNFunc, Ops);
22578   } else {
22579     static const Intrinsic::ID IntrIds[] = {
22580         Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
22581         Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
22582         Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
22583         Intrinsic::riscv_vsseg8};
22584 
22585     unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
22586     unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
22587     Type *VecTupTy = TargetExtType::get(
22588         SI->getContext(), "riscv.vector.tuple",
22589         ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
22590                                 NumElts * SEW / 8),
22591         Factor);
22592 
22593     Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
22594         SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
22595 
22596     Value *VL = Constant::getAllOnesValue(XLenTy);
22597 
22598     Value *StoredVal = PoisonValue::get(VecTupTy);
22599     for (unsigned i = 0; i < Factor; ++i)
22600       StoredVal = Builder.CreateIntrinsic(
22601           Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
22602           {StoredVal, InterleaveValues[i], Builder.getInt32(i)});
22603 
22604     Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
22605                                     ConstantInt::get(XLenTy, Log2_64(SEW))});
22606   }
22607 
22608   return true;
22609 }
22610 
22611 MachineInstr *
22612 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
22613                                    MachineBasicBlock::instr_iterator &MBBI,
22614                                    const TargetInstrInfo *TII) const {
22615   assert(MBBI->isCall() && MBBI->getCFIType() &&
22616          "Invalid call instruction for a KCFI check");
22617   assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
22618                       MBBI->getOpcode()));
22619 
22620   MachineOperand &Target = MBBI->getOperand(0);
22621   Target.setIsRenamable(false);
22622 
22623   return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
22624       .addReg(Target.getReg())
22625       .addImm(MBBI->getCFIType())
22626       .getInstr();
22627 }
22628 
22629 #define GET_REGISTER_MATCHER
22630 #include "RISCVGenAsmMatcher.inc"
22631 
22632 Register
22633 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
22634                                        const MachineFunction &MF) const {
22635   Register Reg = MatchRegisterAltName(RegName);
22636   if (Reg == RISCV::NoRegister)
22637     Reg = MatchRegisterName(RegName);
22638   if (Reg == RISCV::NoRegister)
22639     report_fatal_error(
22640         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
22641   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
22642   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
22643     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
22644                              StringRef(RegName) + "\"."));
22645   return Reg;
22646 }
22647 
22648 MachineMemOperand::Flags
22649 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
22650   const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
22651 
22652   if (NontemporalInfo == nullptr)
22653     return MachineMemOperand::MONone;
22654 
22655   // 1 for default value work as __RISCV_NTLH_ALL
22656   // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
22657   // 3 -> __RISCV_NTLH_ALL_PRIVATE
22658   // 4 -> __RISCV_NTLH_INNERMOST_SHARED
22659   // 5 -> __RISCV_NTLH_ALL
22660   int NontemporalLevel = 5;
22661   const MDNode *RISCVNontemporalInfo =
22662       I.getMetadata("riscv-nontemporal-domain");
22663   if (RISCVNontemporalInfo != nullptr)
22664     NontemporalLevel =
22665         cast<ConstantInt>(
22666             cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
22667                 ->getValue())
22668             ->getZExtValue();
22669 
22670   assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
22671          "RISC-V target doesn't support this non-temporal domain.");
22672 
22673   NontemporalLevel -= 2;
22674   MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
22675   if (NontemporalLevel & 0b1)
22676     Flags |= MONontemporalBit0;
22677   if (NontemporalLevel & 0b10)
22678     Flags |= MONontemporalBit1;
22679 
22680   return Flags;
22681 }
22682 
22683 MachineMemOperand::Flags
22684 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
22685 
22686   MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
22687   MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
22688   TargetFlags |= (NodeFlags & MONontemporalBit0);
22689   TargetFlags |= (NodeFlags & MONontemporalBit1);
22690   return TargetFlags;
22691 }
22692 
22693 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
22694     const MemSDNode &NodeX, const MemSDNode &NodeY) const {
22695   return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
22696 }
22697 
22698 bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
22699   if (VT.isScalableVector())
22700     return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
22701   if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
22702     return true;
22703   return Subtarget.hasStdExtZbb() &&
22704          (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
22705 }
22706 
22707 unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
22708                                                  ISD::CondCode Cond) const {
22709   return isCtpopFast(VT) ? 0 : 1;
22710 }
22711 
22712 bool RISCVTargetLowering::shouldInsertFencesForAtomic(
22713     const Instruction *I) const {
22714   if (Subtarget.hasStdExtZalasr()) {
22715     if (Subtarget.hasStdExtZtso()) {
22716       // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
22717       // should be lowered to plain load/store. The easiest way to do this is
22718       // to say we should insert fences for them, and the fence insertion code
22719       // will just not insert any fences
22720       auto *LI = dyn_cast<LoadInst>(I);
22721       auto *SI = dyn_cast<StoreInst>(I);
22722       if ((LI &&
22723            (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
22724           (SI &&
22725            (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
22726         // Here, this is a load or store which is seq_cst, and needs a .aq or
22727         // .rl therefore we shouldn't try to insert fences
22728         return false;
22729       }
22730       // Here, we are a TSO inst that isn't a seq_cst load/store
22731       return isa<LoadInst>(I) || isa<StoreInst>(I);
22732     }
22733     return false;
22734   }
22735   // Note that one specific case requires fence insertion for an
22736   // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
22737   // than this hook due to limitations in the interface here.
22738   return isa<LoadInst>(I) || isa<StoreInst>(I);
22739 }
22740 
22741 bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
22742 
22743   // GISel support is in progress or complete for these opcodes.
22744   unsigned Op = Inst.getOpcode();
22745   if (Op == Instruction::Add || Op == Instruction::Sub ||
22746       Op == Instruction::And || Op == Instruction::Or ||
22747       Op == Instruction::Xor || Op == Instruction::InsertElement ||
22748       Op == Instruction::ShuffleVector || Op == Instruction::Load ||
22749       Op == Instruction::Freeze || Op == Instruction::Store)
22750     return false;
22751 
22752   if (Inst.getType()->isScalableTy())
22753     return true;
22754 
22755   for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
22756     if (Inst.getOperand(i)->getType()->isScalableTy() &&
22757         !isa<ReturnInst>(&Inst))
22758       return true;
22759 
22760   if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
22761     if (AI->getAllocatedType()->isScalableTy())
22762       return true;
22763   }
22764 
22765   return false;
22766 }
22767 
22768 SDValue
22769 RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
22770                                    SelectionDAG &DAG,
22771                                    SmallVectorImpl<SDNode *> &Created) const {
22772   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
22773   if (isIntDivCheap(N->getValueType(0), Attr))
22774     return SDValue(N, 0); // Lower SDIV as SDIV
22775 
22776   // Only perform this transform if short forward branch opt is supported.
22777   if (!Subtarget.hasShortForwardBranchOpt())
22778     return SDValue();
22779   EVT VT = N->getValueType(0);
22780   if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
22781     return SDValue();
22782 
22783   // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
22784   if (Divisor.sgt(2048) || Divisor.slt(-2048))
22785     return SDValue();
22786   return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
22787 }
22788 
22789 bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22790     EVT VT, const APInt &AndMask) const {
22791   if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22792     return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
22793   return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
22794 }
22795 
22796 unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
22797   return Subtarget.getMinimumJumpTableEntries();
22798 }
22799 
22800 SDValue RISCVTargetLowering::expandIndirectJTBranch(const SDLoc &dl,
22801                                                     SDValue Value, SDValue Addr,
22802                                                     int JTI,
22803                                                     SelectionDAG &DAG) const {
22804   if (Subtarget.hasStdExtZicfilp()) {
22805     // When Zicfilp enabled, we need to use software guarded branch for jump
22806     // table branch.
22807     SDValue Chain = Value;
22808     // Jump table debug info is only needed if CodeView is enabled.
22809     if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF())
22810       Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
22811     return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
22812   }
22813   return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22814 }
22815 
22816 // If an output pattern produces multiple instructions tablegen may pick an
22817 // arbitrary type from an instructions destination register class to use for the
22818 // VT of that MachineSDNode. This VT may be used to look up the representative
22819 // register class. If the type isn't legal, the default implementation will
22820 // not find a register class.
22821 //
22822 // Some integer types smaller than XLen are listed in the GPR register class to
22823 // support isel patterns for GISel, but are not legal in SelectionDAG. The
22824 // arbitrary type tablegen picks may be one of these smaller types.
22825 //
22826 // f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
22827 // possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
22828 std::pair<const TargetRegisterClass *, uint8_t>
22829 RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
22830                                              MVT VT) const {
22831   switch (VT.SimpleTy) {
22832   default:
22833     break;
22834   case MVT::i8:
22835   case MVT::i16:
22836   case MVT::i32:
22837     return TargetLowering::findRepresentativeClass(TRI, Subtarget.getXLenVT());
22838   case MVT::bf16:
22839   case MVT::f16:
22840     return TargetLowering::findRepresentativeClass(TRI, MVT::f32);
22841   }
22842 
22843   return TargetLowering::findRepresentativeClass(TRI, VT);
22844 }
22845 
22846 namespace llvm::RISCVVIntrinsicsTable {
22847 
22848 #define GET_RISCVVIntrinsicsTable_IMPL
22849 #include "RISCVGenSearchableTables.inc"
22850 
22851 } // namespace llvm::RISCVVIntrinsicsTable
22852 
22853 bool RISCVTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
22854 
22855   // If the function specifically requests inline stack probes, emit them.
22856   if (MF.getFunction().hasFnAttribute("probe-stack"))
22857     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
22858            "inline-asm";
22859 
22860   return false;
22861 }
22862 
22863 unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF,
22864                                                 Align StackAlign) const {
22865   // The default stack probe size is 4096 if the function has no
22866   // stack-probe-size attribute.
22867   const Function &Fn = MF.getFunction();
22868   unsigned StackProbeSize =
22869       Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
22870   // Round down to the stack alignment.
22871   StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
22872   return StackProbeSize ? StackProbeSize : StackAlign.value();
22873 }
22874 
22875 SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
22876                                                      SelectionDAG &DAG) const {
22877   MachineFunction &MF = DAG.getMachineFunction();
22878   if (!hasInlineStackProbe(MF))
22879     return SDValue();
22880 
22881   MVT XLenVT = Subtarget.getXLenVT();
22882   // Get the inputs.
22883   SDValue Chain = Op.getOperand(0);
22884   SDValue Size = Op.getOperand(1);
22885 
22886   MaybeAlign Align =
22887       cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
22888   SDLoc dl(Op);
22889   EVT VT = Op.getValueType();
22890 
22891   // Construct the new SP value in a GPR.
22892   SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
22893   Chain = SP.getValue(1);
22894   SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
22895   if (Align)
22896     SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
22897                      DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));
22898 
22899   // Set the real SP to the new value with a probing loop.
22900   Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
22901   return DAG.getMergeValues({SP, Chain}, dl);
22902 }
22903 
22904 MachineBasicBlock *
22905 RISCVTargetLowering::emitDynamicProbedAlloc(MachineInstr &MI,
22906                                             MachineBasicBlock *MBB) const {
22907   MachineFunction &MF = *MBB->getParent();
22908   MachineBasicBlock::iterator MBBI = MI.getIterator();
22909   DebugLoc DL = MBB->findDebugLoc(MBBI);
22910   Register TargetReg = MI.getOperand(1).getReg();
22911 
22912   const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
22913   bool IsRV64 = Subtarget.is64Bit();
22914   Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
22915   const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
22916   uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
22917 
22918   MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
22919   MachineBasicBlock *LoopTestMBB =
22920       MF.CreateMachineBasicBlock(MBB->getBasicBlock());
22921   MF.insert(MBBInsertPoint, LoopTestMBB);
22922   MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
22923   MF.insert(MBBInsertPoint, ExitMBB);
22924   Register SPReg = RISCV::X2;
22925   Register ScratchReg =
22926       MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
22927 
22928   // ScratchReg = ProbeSize
22929   TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
22930 
22931   // LoopTest:
22932   //   SUB SP, SP, ProbeSize
22933   BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
22934       .addReg(SPReg)
22935       .addReg(ScratchReg);
22936 
22937   //   s[d|w] zero, 0(sp)
22938   BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
22939           TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
22940       .addReg(RISCV::X0)
22941       .addReg(SPReg)
22942       .addImm(0);
22943 
22944   //  BLT TargetReg, SP, LoopTest
22945   BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
22946       .addReg(TargetReg)
22947       .addReg(SPReg)
22948       .addMBB(LoopTestMBB);
22949 
22950   // Adjust with: MV SP, TargetReg.
22951   BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
22952       .addReg(TargetReg)
22953       .addImm(0);
22954 
22955   ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
22956   ExitMBB->transferSuccessorsAndUpdatePHIs(MBB);
22957 
22958   LoopTestMBB->addSuccessor(ExitMBB);
22959   LoopTestMBB->addSuccessor(LoopTestMBB);
22960   MBB->addSuccessor(LoopTestMBB);
22961 
22962   MI.eraseFromParent();
22963   MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
22964   return ExitMBB->begin()->getParent();
22965 }
22966