xref: /llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (revision 19834b4623fd1e7ae5185ed76031b407c3fa7a47)
1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation  ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "MCTargetDesc/LoongArchBaseInfo.h"
20 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/CodeGen/ISDOpcodes.h"
24 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
25 #include "llvm/CodeGen/SelectionDAGNodes.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/IntrinsicsLoongArch.h"
29 #include "llvm/Support/CodeGen.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "loongarch-isel-lowering"
38 
39 STATISTIC(NumTailCalls, "Number of tail calls");
40 
41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42                                   cl::desc("Trap on integer division by zero."),
43                                   cl::init(false));
44 
45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46                                                  const LoongArchSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   MVT GRLenVT = Subtarget.getGRLenVT();
50 
51   // Set up the register classes.
52 
53   addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54   if (Subtarget.hasBasicF())
55     addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56   if (Subtarget.hasBasicD())
57     addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58 
59   static const MVT::SimpleValueType LSXVTs[] = {
60       MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61   static const MVT::SimpleValueType LASXVTs[] = {
62       MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63 
64   if (Subtarget.hasExtLSX())
65     for (MVT VT : LSXVTs)
66       addRegisterClass(VT, &LoongArch::LSX128RegClass);
67 
68   if (Subtarget.hasExtLASX())
69     for (MVT VT : LASXVTs)
70       addRegisterClass(VT, &LoongArch::LASX256RegClass);
71 
72   // Set operations for LA32 and LA64.
73 
74   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
75                    MVT::i1, Promote);
76 
77   setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
78   setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
79   setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
80   setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
81   setOperationAction(ISD::ROTL, GRLenVT, Expand);
82   setOperationAction(ISD::CTPOP, GRLenVT, Expand);
83 
84   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85                       ISD::JumpTable, ISD::GlobalTLSAddress},
86                      GRLenVT, Custom);
87 
88   setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
89 
90   setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
91   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
92   setOperationAction(ISD::VASTART, MVT::Other, Custom);
93   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
94 
95   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
96   setOperationAction(ISD::TRAP, MVT::Other, Legal);
97 
98   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
99   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
101 
102   setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
103 
104   // Expand bitreverse.i16 with native-width bitrev and shift for now, before
105   // we get to know which of sll and revb.2h is faster.
106   setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
107   setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
108 
109   // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
110   // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
111   // and i32 could still be byte-swapped relatively cheaply.
112   setOperationAction(ISD::BSWAP, MVT::i16, Custom);
113 
114   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
115   setOperationAction(ISD::BR_CC, GRLenVT, Expand);
116   setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
117   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
118   setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
119 
120   setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
121   setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
122 
123   // Set operations for LA64 only.
124 
125   if (Subtarget.is64Bit()) {
126     setOperationAction(ISD::ADD, MVT::i32, Custom);
127     setOperationAction(ISD::SUB, MVT::i32, Custom);
128     setOperationAction(ISD::SHL, MVT::i32, Custom);
129     setOperationAction(ISD::SRA, MVT::i32, Custom);
130     setOperationAction(ISD::SRL, MVT::i32, Custom);
131     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
132     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
133     setOperationAction(ISD::ROTR, MVT::i32, Custom);
134     setOperationAction(ISD::ROTL, MVT::i32, Custom);
135     setOperationAction(ISD::CTTZ, MVT::i32, Custom);
136     setOperationAction(ISD::CTLZ, MVT::i32, Custom);
137     setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
138     setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
139     setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
140     setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
141     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
142     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
143 
144     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
145     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
146     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,
147                        Custom);
148     setOperationAction(ISD::LROUND, MVT::i32, Custom);
149   }
150 
151   // Set operations for LA32 only.
152 
153   if (!Subtarget.is64Bit()) {
154     setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
155     setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
156     setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
157     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
158     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
159   }
160 
161   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
162 
163   static const ISD::CondCode FPCCToExpand[] = {
164       ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
165       ISD::SETGE,  ISD::SETNE,  ISD::SETGT};
166 
167   // Set operations for 'F' feature.
168 
169   if (Subtarget.hasBasicF()) {
170     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
171     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
172     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
173 
174     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
175     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
176     setOperationAction(ISD::FMA, MVT::f32, Legal);
177     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
178     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
179     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
180     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
181     setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
182     setOperationAction(ISD::FSIN, MVT::f32, Expand);
183     setOperationAction(ISD::FCOS, MVT::f32, Expand);
184     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
185     setOperationAction(ISD::FPOW, MVT::f32, Expand);
186     setOperationAction(ISD::FREM, MVT::f32, Expand);
187     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
188     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
189 
190     if (Subtarget.is64Bit())
191       setOperationAction(ISD::FRINT, MVT::f32, Legal);
192 
193     if (!Subtarget.hasBasicD()) {
194       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
195       if (Subtarget.is64Bit()) {
196         setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
197         setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
198       }
199     }
200   }
201 
202   // Set operations for 'D' feature.
203 
204   if (Subtarget.hasBasicD()) {
205     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
206     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
207     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
208     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
209     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
210 
211     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
212     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
213     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
214     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
215     setOperationAction(ISD::FMA, MVT::f64, Legal);
216     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
217     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
218     setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
219     setOperationAction(ISD::FSIN, MVT::f64, Expand);
220     setOperationAction(ISD::FCOS, MVT::f64, Expand);
221     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
222     setOperationAction(ISD::FPOW, MVT::f64, Expand);
223     setOperationAction(ISD::FREM, MVT::f64, Expand);
224     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
225     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
226 
227     if (Subtarget.is64Bit())
228       setOperationAction(ISD::FRINT, MVT::f64, Legal);
229   }
230 
231   // Set operations for 'LSX' feature.
232 
233   if (Subtarget.hasExtLSX()) {
234     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
235       // Expand all truncating stores and extending loads.
236       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
237         setTruncStoreAction(VT, InnerVT, Expand);
238         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
239         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
240         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
241       }
242       // By default everything must be expanded. Then we will selectively turn
243       // on ones that can be effectively codegen'd.
244       for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
245         setOperationAction(Op, VT, Expand);
246     }
247 
248     for (MVT VT : LSXVTs) {
249       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
250       setOperationAction(ISD::BITCAST, VT, Legal);
251       setOperationAction(ISD::UNDEF, VT, Legal);
252 
253       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
254       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
255       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
256 
257       setOperationAction(ISD::SETCC, VT, Legal);
258       setOperationAction(ISD::VSELECT, VT, Legal);
259       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
260     }
261     for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
262       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
263       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
264                          Legal);
265       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
266                          VT, Legal);
267       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
268       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
269       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
270       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
271       setCondCodeAction(
272           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
273           Expand);
274       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
275     }
276     for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
277       setOperationAction(ISD::BITREVERSE, VT, Custom);
278     for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
279       setOperationAction(ISD::BSWAP, VT, Legal);
280     for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
281       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
282       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
283     }
284     for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
285       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
286       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
287       setOperationAction(ISD::FMA, VT, Legal);
288       setOperationAction(ISD::FSQRT, VT, Legal);
289       setOperationAction(ISD::FNEG, VT, Legal);
290       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
291                          ISD::SETUGE, ISD::SETUGT},
292                         VT, Expand);
293       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
294     }
295     setOperationAction(ISD::CTPOP, GRLenVT, Legal);
296     setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
297     setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
298     setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
299     setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
300   }
301 
302   // Set operations for 'LASX' feature.
303 
304   if (Subtarget.hasExtLASX()) {
305     for (MVT VT : LASXVTs) {
306       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
307       setOperationAction(ISD::BITCAST, VT, Legal);
308       setOperationAction(ISD::UNDEF, VT, Legal);
309 
310       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
311       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
312       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
313       setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
314 
315       setOperationAction(ISD::SETCC, VT, Legal);
316       setOperationAction(ISD::VSELECT, VT, Legal);
317       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
318     }
319     for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
320       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
321       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
322                          Legal);
323       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
324                          VT, Legal);
325       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
326       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
327       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
328       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
329       setCondCodeAction(
330           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
331           Expand);
332       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
333     }
334     for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
335       setOperationAction(ISD::BITREVERSE, VT, Custom);
336     for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
337       setOperationAction(ISD::BSWAP, VT, Legal);
338     for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
339       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
340       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
341     }
342     for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
343       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
344       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
345       setOperationAction(ISD::FMA, VT, Legal);
346       setOperationAction(ISD::FSQRT, VT, Legal);
347       setOperationAction(ISD::FNEG, VT, Legal);
348       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
349                          ISD::SETUGE, ISD::SETUGT},
350                         VT, Expand);
351       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
352     }
353   }
354 
355   // Set DAG combine for LA32 and LA64.
356 
357   setTargetDAGCombine(ISD::AND);
358   setTargetDAGCombine(ISD::OR);
359   setTargetDAGCombine(ISD::SRL);
360   setTargetDAGCombine(ISD::SETCC);
361 
362   // Set DAG combine for 'LSX' feature.
363 
364   if (Subtarget.hasExtLSX())
365     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
366 
367   // Compute derived properties from the register classes.
368   computeRegisterProperties(Subtarget.getRegisterInfo());
369 
370   setStackPointerRegisterToSaveRestore(LoongArch::R3);
371 
372   setBooleanContents(ZeroOrOneBooleanContent);
373   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
374 
375   setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
376 
377   setMinCmpXchgSizeInBits(32);
378 
379   // Function alignments.
380   setMinFunctionAlignment(Align(4));
381   // Set preferred alignments.
382   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
383   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
384   setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
385 
386   // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
387   if (Subtarget.hasLAMCAS())
388     setMinCmpXchgSizeInBits(8);
389 
390   if (Subtarget.hasSCQ()) {
391     setMaxAtomicSizeInBitsSupported(128);
392     setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
393   }
394 }
395 
396 bool LoongArchTargetLowering::isOffsetFoldingLegal(
397     const GlobalAddressSDNode *GA) const {
398   // In order to maximise the opportunity for common subexpression elimination,
399   // keep a separate ADD node for the global address offset instead of folding
400   // it in the global address node. Later peephole optimisations may choose to
401   // fold it back in when profitable.
402   return false;
403 }
404 
405 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
406                                                 SelectionDAG &DAG) const {
407   switch (Op.getOpcode()) {
408   case ISD::ATOMIC_FENCE:
409     return lowerATOMIC_FENCE(Op, DAG);
410   case ISD::EH_DWARF_CFA:
411     return lowerEH_DWARF_CFA(Op, DAG);
412   case ISD::GlobalAddress:
413     return lowerGlobalAddress(Op, DAG);
414   case ISD::GlobalTLSAddress:
415     return lowerGlobalTLSAddress(Op, DAG);
416   case ISD::INTRINSIC_WO_CHAIN:
417     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
418   case ISD::INTRINSIC_W_CHAIN:
419     return lowerINTRINSIC_W_CHAIN(Op, DAG);
420   case ISD::INTRINSIC_VOID:
421     return lowerINTRINSIC_VOID(Op, DAG);
422   case ISD::BlockAddress:
423     return lowerBlockAddress(Op, DAG);
424   case ISD::JumpTable:
425     return lowerJumpTable(Op, DAG);
426   case ISD::SHL_PARTS:
427     return lowerShiftLeftParts(Op, DAG);
428   case ISD::SRA_PARTS:
429     return lowerShiftRightParts(Op, DAG, true);
430   case ISD::SRL_PARTS:
431     return lowerShiftRightParts(Op, DAG, false);
432   case ISD::ConstantPool:
433     return lowerConstantPool(Op, DAG);
434   case ISD::FP_TO_SINT:
435     return lowerFP_TO_SINT(Op, DAG);
436   case ISD::BITCAST:
437     return lowerBITCAST(Op, DAG);
438   case ISD::UINT_TO_FP:
439     return lowerUINT_TO_FP(Op, DAG);
440   case ISD::SINT_TO_FP:
441     return lowerSINT_TO_FP(Op, DAG);
442   case ISD::VASTART:
443     return lowerVASTART(Op, DAG);
444   case ISD::FRAMEADDR:
445     return lowerFRAMEADDR(Op, DAG);
446   case ISD::RETURNADDR:
447     return lowerRETURNADDR(Op, DAG);
448   case ISD::WRITE_REGISTER:
449     return lowerWRITE_REGISTER(Op, DAG);
450   case ISD::INSERT_VECTOR_ELT:
451     return lowerINSERT_VECTOR_ELT(Op, DAG);
452   case ISD::EXTRACT_VECTOR_ELT:
453     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
454   case ISD::BUILD_VECTOR:
455     return lowerBUILD_VECTOR(Op, DAG);
456   case ISD::VECTOR_SHUFFLE:
457     return lowerVECTOR_SHUFFLE(Op, DAG);
458   case ISD::BITREVERSE:
459     return lowerBITREVERSE(Op, DAG);
460   case ISD::SCALAR_TO_VECTOR:
461     return lowerSCALAR_TO_VECTOR(Op, DAG);
462   }
463   return SDValue();
464 }
465 
466 SDValue
467 LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
468                                                SelectionDAG &DAG) const {
469   SDLoc DL(Op);
470   MVT OpVT = Op.getSimpleValueType();
471 
472   SDValue Vector = DAG.getUNDEF(OpVT);
473   SDValue Val = Op.getOperand(0);
474   SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
475 
476   return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
477 }
478 
479 SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
480                                                  SelectionDAG &DAG) const {
481   EVT ResTy = Op->getValueType(0);
482   SDValue Src = Op->getOperand(0);
483   SDLoc DL(Op);
484 
485   EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
486   unsigned int OrigEltNum = ResTy.getVectorNumElements();
487   unsigned int NewEltNum = NewVT.getVectorNumElements();
488 
489   SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
490 
491   SmallVector<SDValue, 8> Ops;
492   for (unsigned int i = 0; i < NewEltNum; i++) {
493     SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
494                              DAG.getConstant(i, DL, MVT::i64));
495     unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
496                          ? (unsigned)LoongArchISD::BITREV_8B
497                          : (unsigned)ISD::BITREVERSE;
498     Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
499   }
500   SDValue Res =
501       DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
502 
503   switch (ResTy.getSimpleVT().SimpleTy) {
504   default:
505     return SDValue();
506   case MVT::v16i8:
507   case MVT::v32i8:
508     return Res;
509   case MVT::v8i16:
510   case MVT::v16i16:
511   case MVT::v4i32:
512   case MVT::v8i32: {
513     SmallVector<int, 32> Mask;
514     for (unsigned int i = 0; i < NewEltNum; i++)
515       for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
516         Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
517     return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
518   }
519   }
520 }
521 
522 /// Determine whether a range fits a regular pattern of values.
523 /// This function accounts for the possibility of jumping over the End iterator.
524 template <typename ValType>
525 static bool
526 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
527                    unsigned CheckStride,
528                    typename SmallVectorImpl<ValType>::const_iterator End,
529                    ValType ExpectedIndex, unsigned ExpectedIndexStride) {
530   auto &I = Begin;
531 
532   while (I != End) {
533     if (*I != -1 && *I != ExpectedIndex)
534       return false;
535     ExpectedIndex += ExpectedIndexStride;
536 
537     // Incrementing past End is undefined behaviour so we must increment one
538     // step at a time and check for End at each step.
539     for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
540       ; // Empty loop body.
541   }
542   return true;
543 }
544 
545 /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
546 ///
547 /// VREPLVEI performs vector broadcast based on an element specified by an
548 /// integer immediate, with its mask being similar to:
549 ///   <x, x, x, ...>
550 /// where x is any valid index.
551 ///
552 /// When undef's appear in the mask they are treated as if they were whatever
553 /// value is necessary in order to fit the above form.
554 static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
555                                             MVT VT, SDValue V1, SDValue V2,
556                                             SelectionDAG &DAG) {
557   int SplatIndex = -1;
558   for (const auto &M : Mask) {
559     if (M != -1) {
560       SplatIndex = M;
561       break;
562     }
563   }
564 
565   if (SplatIndex == -1)
566     return DAG.getUNDEF(VT);
567 
568   assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
569   if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
570     APInt Imm(64, SplatIndex);
571     return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
572                        DAG.getConstant(Imm, DL, MVT::i64));
573   }
574 
575   return SDValue();
576 }
577 
578 /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
579 ///
580 /// VSHUF4I splits the vector into blocks of four elements, then shuffles these
581 /// elements according to a <4 x i2> constant (encoded as an integer immediate).
582 ///
583 /// It is therefore possible to lower into VSHUF4I when the mask takes the form:
584 ///   <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
585 /// When undef's appear they are treated as if they were whatever value is
586 /// necessary in order to fit the above forms.
587 ///
588 /// For example:
589 ///   %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
590 ///                      <8 x i32> <i32 3, i32 2, i32 1, i32 0,
591 ///                                 i32 7, i32 6, i32 5, i32 4>
592 /// is lowered to:
593 ///   (VSHUF4I_H $v0, $v1, 27)
594 /// where the 27 comes from:
595 ///   3 + (2 << 2) + (1 << 4) + (0 << 6)
596 static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
597                                            MVT VT, SDValue V1, SDValue V2,
598                                            SelectionDAG &DAG) {
599 
600   // When the size is less than 4, lower cost instructions may be used.
601   if (Mask.size() < 4)
602     return SDValue();
603 
604   int SubMask[4] = {-1, -1, -1, -1};
605   for (unsigned i = 0; i < 4; ++i) {
606     for (unsigned j = i; j < Mask.size(); j += 4) {
607       int Idx = Mask[j];
608 
609       // Convert from vector index to 4-element subvector index
610       // If an index refers to an element outside of the subvector then give up
611       if (Idx != -1) {
612         Idx -= 4 * (j / 4);
613         if (Idx < 0 || Idx >= 4)
614           return SDValue();
615       }
616 
617       // If the mask has an undef, replace it with the current index.
618       // Note that it might still be undef if the current index is also undef
619       if (SubMask[i] == -1)
620         SubMask[i] = Idx;
621       // Check that non-undef values are the same as in the mask. If they
622       // aren't then give up
623       else if (Idx != -1 && Idx != SubMask[i])
624         return SDValue();
625     }
626   }
627 
628   // Calculate the immediate. Replace any remaining undefs with zero
629   APInt Imm(64, 0);
630   for (int i = 3; i >= 0; --i) {
631     int Idx = SubMask[i];
632 
633     if (Idx == -1)
634       Idx = 0;
635 
636     Imm <<= 2;
637     Imm |= Idx & 0x3;
638   }
639 
640   return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
641                      DAG.getConstant(Imm, DL, MVT::i64));
642 }
643 
644 /// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
645 ///
646 /// VPACKEV interleaves the even elements from each vector.
647 ///
648 /// It is possible to lower into VPACKEV when the mask consists of two of the
649 /// following forms interleaved:
650 ///   <0, 2, 4, ...>
651 ///   <n, n+2, n+4, ...>
652 /// where n is the number of elements in the vector.
653 /// For example:
654 ///   <0, 0, 2, 2, 4, 4, ...>
655 ///   <0, n, 2, n+2, 4, n+4, ...>
656 ///
657 /// When undef's appear in the mask they are treated as if they were whatever
658 /// value is necessary in order to fit the above forms.
659 static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
660                                            MVT VT, SDValue V1, SDValue V2,
661                                            SelectionDAG &DAG) {
662 
663   const auto &Begin = Mask.begin();
664   const auto &End = Mask.end();
665   SDValue OriV1 = V1, OriV2 = V2;
666 
667   if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
668     V1 = OriV1;
669   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
670     V1 = OriV2;
671   else
672     return SDValue();
673 
674   if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
675     V2 = OriV1;
676   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
677     V2 = OriV2;
678   else
679     return SDValue();
680 
681   return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
682 }
683 
684 /// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
685 ///
686 /// VPACKOD interleaves the odd elements from each vector.
687 ///
688 /// It is possible to lower into VPACKOD when the mask consists of two of the
689 /// following forms interleaved:
690 ///   <1, 3, 5, ...>
691 ///   <n+1, n+3, n+5, ...>
692 /// where n is the number of elements in the vector.
693 /// For example:
694 ///   <1, 1, 3, 3, 5, 5, ...>
695 ///   <1, n+1, 3, n+3, 5, n+5, ...>
696 ///
697 /// When undef's appear in the mask they are treated as if they were whatever
698 /// value is necessary in order to fit the above forms.
699 static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
700                                            MVT VT, SDValue V1, SDValue V2,
701                                            SelectionDAG &DAG) {
702 
703   const auto &Begin = Mask.begin();
704   const auto &End = Mask.end();
705   SDValue OriV1 = V1, OriV2 = V2;
706 
707   if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
708     V1 = OriV1;
709   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
710     V1 = OriV2;
711   else
712     return SDValue();
713 
714   if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
715     V2 = OriV1;
716   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
717     V2 = OriV2;
718   else
719     return SDValue();
720 
721   return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
722 }
723 
724 /// Lower VECTOR_SHUFFLE into VILVH (if possible).
725 ///
726 /// VILVH interleaves consecutive elements from the left (highest-indexed) half
727 /// of each vector.
728 ///
729 /// It is possible to lower into VILVH when the mask consists of two of the
730 /// following forms interleaved:
731 ///   <x, x+1, x+2, ...>
732 ///   <n+x, n+x+1, n+x+2, ...>
733 /// where n is the number of elements in the vector and x is half n.
734 /// For example:
735 ///   <x, x, x+1, x+1, x+2, x+2, ...>
736 ///   <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
737 ///
738 /// When undef's appear in the mask they are treated as if they were whatever
739 /// value is necessary in order to fit the above forms.
740 static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
741                                          MVT VT, SDValue V1, SDValue V2,
742                                          SelectionDAG &DAG) {
743 
744   const auto &Begin = Mask.begin();
745   const auto &End = Mask.end();
746   unsigned HalfSize = Mask.size() / 2;
747   SDValue OriV1 = V1, OriV2 = V2;
748 
749   if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
750     V1 = OriV1;
751   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
752     V1 = OriV2;
753   else
754     return SDValue();
755 
756   if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
757     V2 = OriV1;
758   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
759                                    1))
760     V2 = OriV2;
761   else
762     return SDValue();
763 
764   return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
765 }
766 
767 /// Lower VECTOR_SHUFFLE into VILVL (if possible).
768 ///
769 /// VILVL interleaves consecutive elements from the right (lowest-indexed) half
770 /// of each vector.
771 ///
772 /// It is possible to lower into VILVL when the mask consists of two of the
773 /// following forms interleaved:
774 ///   <0, 1, 2, ...>
775 ///   <n, n+1, n+2, ...>
776 /// where n is the number of elements in the vector.
777 /// For example:
778 ///   <0, 0, 1, 1, 2, 2, ...>
779 ///   <0, n, 1, n+1, 2, n+2, ...>
780 ///
781 /// When undef's appear in the mask they are treated as if they were whatever
782 /// value is necessary in order to fit the above forms.
783 static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
784                                          MVT VT, SDValue V1, SDValue V2,
785                                          SelectionDAG &DAG) {
786 
787   const auto &Begin = Mask.begin();
788   const auto &End = Mask.end();
789   SDValue OriV1 = V1, OriV2 = V2;
790 
791   if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
792     V1 = OriV1;
793   else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
794     V1 = OriV2;
795   else
796     return SDValue();
797 
798   if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
799     V2 = OriV1;
800   else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
801     V2 = OriV2;
802   else
803     return SDValue();
804 
805   return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
806 }
807 
808 /// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
809 ///
810 /// VPICKEV copies the even elements of each vector into the result vector.
811 ///
812 /// It is possible to lower into VPICKEV when the mask consists of two of the
813 /// following forms concatenated:
814 ///   <0, 2, 4, ...>
815 ///   <n, n+2, n+4, ...>
816 /// where n is the number of elements in the vector.
817 /// For example:
818 ///   <0, 2, 4, ..., 0, 2, 4, ...>
819 ///   <0, 2, 4, ..., n, n+2, n+4, ...>
820 ///
821 /// When undef's appear in the mask they are treated as if they were whatever
822 /// value is necessary in order to fit the above forms.
823 static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
824                                            MVT VT, SDValue V1, SDValue V2,
825                                            SelectionDAG &DAG) {
826 
827   const auto &Begin = Mask.begin();
828   const auto &Mid = Mask.begin() + Mask.size() / 2;
829   const auto &End = Mask.end();
830   SDValue OriV1 = V1, OriV2 = V2;
831 
832   if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
833     V1 = OriV1;
834   else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
835     V1 = OriV2;
836   else
837     return SDValue();
838 
839   if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
840     V2 = OriV1;
841   else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
842     V2 = OriV2;
843 
844   else
845     return SDValue();
846 
847   return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
848 }
849 
850 /// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
851 ///
852 /// VPICKOD copies the odd elements of each vector into the result vector.
853 ///
854 /// It is possible to lower into VPICKOD when the mask consists of two of the
855 /// following forms concatenated:
856 ///   <1, 3, 5, ...>
857 ///   <n+1, n+3, n+5, ...>
858 /// where n is the number of elements in the vector.
859 /// For example:
860 ///   <1, 3, 5, ..., 1, 3, 5, ...>
861 ///   <1, 3, 5, ..., n+1, n+3, n+5, ...>
862 ///
863 /// When undef's appear in the mask they are treated as if they were whatever
864 /// value is necessary in order to fit the above forms.
865 static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
866                                            MVT VT, SDValue V1, SDValue V2,
867                                            SelectionDAG &DAG) {
868 
869   const auto &Begin = Mask.begin();
870   const auto &Mid = Mask.begin() + Mask.size() / 2;
871   const auto &End = Mask.end();
872   SDValue OriV1 = V1, OriV2 = V2;
873 
874   if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
875     V1 = OriV1;
876   else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
877     V1 = OriV2;
878   else
879     return SDValue();
880 
881   if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
882     V2 = OriV1;
883   else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
884     V2 = OriV2;
885   else
886     return SDValue();
887 
888   return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
889 }
890 
891 /// Lower VECTOR_SHUFFLE into VSHUF.
892 ///
893 /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
894 /// adding it as an operand to the resulting VSHUF.
895 static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
896                                          MVT VT, SDValue V1, SDValue V2,
897                                          SelectionDAG &DAG) {
898 
899   SmallVector<SDValue, 16> Ops;
900   for (auto M : Mask)
901     Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
902 
903   EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
904   SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
905 
906   // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
907   // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
908   // VSHF concatenates the vectors in a bitwise fashion:
909   // <0b00, 0b01> + <0b10, 0b11> ->
910   // 0b0100       + 0b1110       -> 0b01001110
911   //                                <0b10, 0b11, 0b00, 0b01>
912   // We must therefore swap the operands to get the correct result.
913   return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
914 }
915 
916 /// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
917 ///
918 /// This routine breaks down the specific type of 128-bit shuffle and
919 /// dispatches to the lowering routines accordingly.
920 static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
921                                   SDValue V1, SDValue V2, SelectionDAG &DAG) {
922   assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
923           VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
924           VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
925          "Vector type is unsupported for lsx!");
926   assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
927          "Two operands have different types!");
928   assert(VT.getVectorNumElements() == Mask.size() &&
929          "Unexpected mask size for shuffle!");
930   assert(Mask.size() % 2 == 0 && "Expected even mask size.");
931 
932   SDValue Result;
933   // TODO: Add more comparison patterns.
934   if (V2.isUndef()) {
935     if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
936       return Result;
937     if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
938       return Result;
939 
940     // TODO: This comment may be enabled in the future to better match the
941     // pattern for instruction selection.
942     /* V2 = V1; */
943   }
944 
945   // It is recommended not to change the pattern comparison order for better
946   // performance.
947   if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
948     return Result;
949   if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
950     return Result;
951   if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
952     return Result;
953   if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
954     return Result;
955   if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
956     return Result;
957   if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
958     return Result;
959   if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
960     return Result;
961 
962   return SDValue();
963 }
964 
965 /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
966 ///
967 /// It is a XVREPLVEI when the mask is:
968 ///   <x, x, x, ..., x+n, x+n, x+n, ...>
969 /// where the number of x is equal to n and n is half the length of vector.
970 ///
971 /// When undef's appear in the mask they are treated as if they were whatever
972 /// value is necessary in order to fit the above form.
973 static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
974                                              ArrayRef<int> Mask, MVT VT,
975                                              SDValue V1, SDValue V2,
976                                              SelectionDAG &DAG) {
977   int SplatIndex = -1;
978   for (const auto &M : Mask) {
979     if (M != -1) {
980       SplatIndex = M;
981       break;
982     }
983   }
984 
985   if (SplatIndex == -1)
986     return DAG.getUNDEF(VT);
987 
988   const auto &Begin = Mask.begin();
989   const auto &End = Mask.end();
990   unsigned HalfSize = Mask.size() / 2;
991 
992   assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
993   if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
994       fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
995                               0)) {
996     APInt Imm(64, SplatIndex);
997     return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
998                        DAG.getConstant(Imm, DL, MVT::i64));
999   }
1000 
1001   return SDValue();
1002 }
1003 
1004 /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1005 static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1006                                             MVT VT, SDValue V1, SDValue V2,
1007                                             SelectionDAG &DAG) {
1008   // When the size is less than or equal to 4, lower cost instructions may be
1009   // used.
1010   if (Mask.size() <= 4)
1011     return SDValue();
1012   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
1013 }
1014 
1015 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
1016 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1017                                             MVT VT, SDValue V1, SDValue V2,
1018                                             SelectionDAG &DAG) {
1019   return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
1020 }
1021 
1022 /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
1023 static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1024                                             MVT VT, SDValue V1, SDValue V2,
1025                                             SelectionDAG &DAG) {
1026   return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1027 }
1028 
1029 /// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1030 static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
1031                                           MVT VT, SDValue V1, SDValue V2,
1032                                           SelectionDAG &DAG) {
1033 
1034   const auto &Begin = Mask.begin();
1035   const auto &End = Mask.end();
1036   unsigned HalfSize = Mask.size() / 2;
1037   unsigned LeftSize = HalfSize / 2;
1038   SDValue OriV1 = V1, OriV2 = V2;
1039 
1040   if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1041                               1) &&
1042       fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1043     V1 = OriV1;
1044   else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1045                                    Mask.size() + HalfSize - LeftSize, 1) &&
1046            fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1047                                    Mask.size() + HalfSize + LeftSize, 1))
1048     V1 = OriV2;
1049   else
1050     return SDValue();
1051 
1052   if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1053                               1) &&
1054       fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1055                               1))
1056     V2 = OriV1;
1057   else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1058                                    Mask.size() + HalfSize - LeftSize, 1) &&
1059            fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1060                                    Mask.size() + HalfSize + LeftSize, 1))
1061     V2 = OriV2;
1062   else
1063     return SDValue();
1064 
1065   return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1066 }
1067 
1068 /// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1069 static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
1070                                           MVT VT, SDValue V1, SDValue V2,
1071                                           SelectionDAG &DAG) {
1072 
1073   const auto &Begin = Mask.begin();
1074   const auto &End = Mask.end();
1075   unsigned HalfSize = Mask.size() / 2;
1076   SDValue OriV1 = V1, OriV2 = V2;
1077 
1078   if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1079       fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1080     V1 = OriV1;
1081   else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1082            fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1083                                    Mask.size() + HalfSize, 1))
1084     V1 = OriV2;
1085   else
1086     return SDValue();
1087 
1088   if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1089       fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1090     V2 = OriV1;
1091   else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1092                                    1) &&
1093            fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1094                                    Mask.size() + HalfSize, 1))
1095     V2 = OriV2;
1096   else
1097     return SDValue();
1098 
1099   return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1100 }
1101 
1102 /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1103 static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1104                                             MVT VT, SDValue V1, SDValue V2,
1105                                             SelectionDAG &DAG) {
1106 
1107   const auto &Begin = Mask.begin();
1108   const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1109   const auto &Mid = Mask.begin() + Mask.size() / 2;
1110   const auto &RightMid = Mask.end() - Mask.size() / 4;
1111   const auto &End = Mask.end();
1112   unsigned HalfSize = Mask.size() / 2;
1113   SDValue OriV1 = V1, OriV2 = V2;
1114 
1115   if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1116       fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1117     V1 = OriV1;
1118   else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1119            fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1120     V1 = OriV2;
1121   else
1122     return SDValue();
1123 
1124   if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1125       fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1126     V2 = OriV1;
1127   else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1128            fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1129     V2 = OriV2;
1130 
1131   else
1132     return SDValue();
1133 
1134   return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1135 }
1136 
1137 /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1138 static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1139                                             MVT VT, SDValue V1, SDValue V2,
1140                                             SelectionDAG &DAG) {
1141 
1142   const auto &Begin = Mask.begin();
1143   const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1144   const auto &Mid = Mask.begin() + Mask.size() / 2;
1145   const auto &RightMid = Mask.end() - Mask.size() / 4;
1146   const auto &End = Mask.end();
1147   unsigned HalfSize = Mask.size() / 2;
1148   SDValue OriV1 = V1, OriV2 = V2;
1149 
1150   if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1151       fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1152     V1 = OriV1;
1153   else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1154            fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1155                                    2))
1156     V1 = OriV2;
1157   else
1158     return SDValue();
1159 
1160   if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1161       fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1162     V2 = OriV1;
1163   else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1164            fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1165                                    2))
1166     V2 = OriV2;
1167   else
1168     return SDValue();
1169 
1170   return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1171 }
1172 
1173 /// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1174 static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1175                                           MVT VT, SDValue V1, SDValue V2,
1176                                           SelectionDAG &DAG) {
1177 
1178   int MaskSize = Mask.size();
1179   int HalfSize = Mask.size() / 2;
1180   const auto &Begin = Mask.begin();
1181   const auto &Mid = Mask.begin() + HalfSize;
1182   const auto &End = Mask.end();
1183 
1184   // VECTOR_SHUFFLE concatenates the vectors:
1185   //  <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1186   //  shuffling ->
1187   //  <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1188   //
1189   // XVSHUF concatenates the vectors:
1190   //  <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1191   //  shuffling ->
1192   //  <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1193   SmallVector<SDValue, 8> MaskAlloc;
1194   for (auto it = Begin; it < Mid; it++) {
1195     if (*it < 0) // UNDEF
1196       MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1197     else if ((*it >= 0 && *it < HalfSize) ||
1198              (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1199       int M = *it < HalfSize ? *it : *it - HalfSize;
1200       MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1201     } else
1202       return SDValue();
1203   }
1204   assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1205 
1206   for (auto it = Mid; it < End; it++) {
1207     if (*it < 0) // UNDEF
1208       MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1209     else if ((*it >= HalfSize && *it < MaskSize) ||
1210              (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1211       int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1212       MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1213     } else
1214       return SDValue();
1215   }
1216   assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1217 
1218   EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1219   SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1220   return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1221 }
1222 
1223 /// Shuffle vectors by lane to generate more optimized instructions.
1224 /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1225 ///
1226 /// Therefore, except for the following four cases, other cases are regarded
1227 /// as cross-lane shuffles, where optimization is relatively limited.
1228 ///
1229 /// - Shuffle high, low lanes of two inputs vector
1230 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1231 /// - Shuffle low, high lanes of two inputs vector
1232 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1233 /// - Shuffle low, low lanes of two inputs vector
1234 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1235 /// - Shuffle high, high lanes of two inputs vector
1236 ///   <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1237 ///
1238 /// The first case is the closest to LoongArch instructions and the other
1239 /// cases need to be converted to it for processing.
1240 ///
1241 /// This function may modify V1, V2 and Mask
1242 static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
1243                                             MutableArrayRef<int> Mask, MVT VT,
1244                                             SDValue &V1, SDValue &V2,
1245                                             SelectionDAG &DAG) {
1246 
1247   enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1248 
1249   int MaskSize = Mask.size();
1250   int HalfSize = Mask.size() / 2;
1251 
1252   HalfMaskType preMask = None, postMask = None;
1253 
1254   if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1255         return M < 0 || (M >= 0 && M < HalfSize) ||
1256                (M >= MaskSize && M < MaskSize + HalfSize);
1257       }))
1258     preMask = HighLaneTy;
1259   else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1260              return M < 0 || (M >= HalfSize && M < MaskSize) ||
1261                     (M >= MaskSize + HalfSize && M < MaskSize * 2);
1262            }))
1263     preMask = LowLaneTy;
1264 
1265   if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1266         return M < 0 || (M >= 0 && M < HalfSize) ||
1267                (M >= MaskSize && M < MaskSize + HalfSize);
1268       }))
1269     postMask = HighLaneTy;
1270   else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1271              return M < 0 || (M >= HalfSize && M < MaskSize) ||
1272                     (M >= MaskSize + HalfSize && M < MaskSize * 2);
1273            }))
1274     postMask = LowLaneTy;
1275 
1276   // The pre-half of mask is high lane type, and the post-half of mask
1277   // is low lane type, which is closest to the LoongArch instructions.
1278   //
1279   // Note: In the LoongArch architecture, the high lane of mask corresponds
1280   // to the lower 128-bit of vector register, and the low lane of mask
1281   // corresponds the higher 128-bit of vector register.
1282   if (preMask == HighLaneTy && postMask == LowLaneTy) {
1283     return;
1284   }
1285   if (preMask == LowLaneTy && postMask == HighLaneTy) {
1286     V1 = DAG.getBitcast(MVT::v4i64, V1);
1287     V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1288                      DAG.getConstant(0b01001110, DL, MVT::i64));
1289     V1 = DAG.getBitcast(VT, V1);
1290 
1291     if (!V2.isUndef()) {
1292       V2 = DAG.getBitcast(MVT::v4i64, V2);
1293       V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1294                        DAG.getConstant(0b01001110, DL, MVT::i64));
1295       V2 = DAG.getBitcast(VT, V2);
1296     }
1297 
1298     for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1299       *it = *it < 0 ? *it : *it - HalfSize;
1300     }
1301     for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1302       *it = *it < 0 ? *it : *it + HalfSize;
1303     }
1304   } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1305     V1 = DAG.getBitcast(MVT::v4i64, V1);
1306     V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1307                      DAG.getConstant(0b11101110, DL, MVT::i64));
1308     V1 = DAG.getBitcast(VT, V1);
1309 
1310     if (!V2.isUndef()) {
1311       V2 = DAG.getBitcast(MVT::v4i64, V2);
1312       V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1313                        DAG.getConstant(0b11101110, DL, MVT::i64));
1314       V2 = DAG.getBitcast(VT, V2);
1315     }
1316 
1317     for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1318       *it = *it < 0 ? *it : *it - HalfSize;
1319     }
1320   } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1321     V1 = DAG.getBitcast(MVT::v4i64, V1);
1322     V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1323                      DAG.getConstant(0b01000100, DL, MVT::i64));
1324     V1 = DAG.getBitcast(VT, V1);
1325 
1326     if (!V2.isUndef()) {
1327       V2 = DAG.getBitcast(MVT::v4i64, V2);
1328       V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1329                        DAG.getConstant(0b01000100, DL, MVT::i64));
1330       V2 = DAG.getBitcast(VT, V2);
1331     }
1332 
1333     for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1334       *it = *it < 0 ? *it : *it + HalfSize;
1335     }
1336   } else { // cross-lane
1337     return;
1338   }
1339 }
1340 
1341 /// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1342 ///
1343 /// This routine breaks down the specific type of 256-bit shuffle and
1344 /// dispatches to the lowering routines accordingly.
1345 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1346                                   SDValue V1, SDValue V2, SelectionDAG &DAG) {
1347   assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1348           VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1349           VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1350          "Vector type is unsupported for lasx!");
1351   assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1352          "Two operands have different types!");
1353   assert(VT.getVectorNumElements() == Mask.size() &&
1354          "Unexpected mask size for shuffle!");
1355   assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1356   assert(Mask.size() >= 4 && "Mask size is less than 4.");
1357 
1358   // canonicalize non cross-lane shuffle vector
1359   SmallVector<int> NewMask(Mask);
1360   canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1361 
1362   SDValue Result;
1363   // TODO: Add more comparison patterns.
1364   if (V2.isUndef()) {
1365     if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1366       return Result;
1367     if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1368       return Result;
1369 
1370     // TODO: This comment may be enabled in the future to better match the
1371     // pattern for instruction selection.
1372     /* V2 = V1; */
1373   }
1374 
1375   // It is recommended not to change the pattern comparison order for better
1376   // performance.
1377   if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1378     return Result;
1379   if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1380     return Result;
1381   if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1382     return Result;
1383   if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1384     return Result;
1385   if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1386     return Result;
1387   if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1388     return Result;
1389   if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1390     return Result;
1391 
1392   return SDValue();
1393 }
1394 
1395 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1396                                                      SelectionDAG &DAG) const {
1397   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1398   ArrayRef<int> OrigMask = SVOp->getMask();
1399   SDValue V1 = Op.getOperand(0);
1400   SDValue V2 = Op.getOperand(1);
1401   MVT VT = Op.getSimpleValueType();
1402   int NumElements = VT.getVectorNumElements();
1403   SDLoc DL(Op);
1404 
1405   bool V1IsUndef = V1.isUndef();
1406   bool V2IsUndef = V2.isUndef();
1407   if (V1IsUndef && V2IsUndef)
1408     return DAG.getUNDEF(VT);
1409 
1410   // When we create a shuffle node we put the UNDEF node to second operand,
1411   // but in some cases the first operand may be transformed to UNDEF.
1412   // In this case we should just commute the node.
1413   if (V1IsUndef)
1414     return DAG.getCommutedVectorShuffle(*SVOp);
1415 
1416   // Check for non-undef masks pointing at an undef vector and make the masks
1417   // undef as well. This makes it easier to match the shuffle based solely on
1418   // the mask.
1419   if (V2IsUndef &&
1420       any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1421     SmallVector<int, 8> NewMask(OrigMask);
1422     for (int &M : NewMask)
1423       if (M >= NumElements)
1424         M = -1;
1425     return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1426   }
1427 
1428   // Check for illegal shuffle mask element index values.
1429   int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1430   (void)MaskUpperLimit;
1431   assert(llvm::all_of(OrigMask,
1432                       [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1433          "Out of bounds shuffle index");
1434 
1435   // For each vector width, delegate to a specialized lowering routine.
1436   if (VT.is128BitVector())
1437     return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1438 
1439   if (VT.is256BitVector())
1440     return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1441 
1442   return SDValue();
1443 }
1444 
1445 static bool isConstantOrUndef(const SDValue Op) {
1446   if (Op->isUndef())
1447     return true;
1448   if (isa<ConstantSDNode>(Op))
1449     return true;
1450   if (isa<ConstantFPSDNode>(Op))
1451     return true;
1452   return false;
1453 }
1454 
1455 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
1456   for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1457     if (isConstantOrUndef(Op->getOperand(i)))
1458       return true;
1459   return false;
1460 }
1461 
1462 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1463                                                    SelectionDAG &DAG) const {
1464   BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1465   EVT ResTy = Op->getValueType(0);
1466   SDLoc DL(Op);
1467   APInt SplatValue, SplatUndef;
1468   unsigned SplatBitSize;
1469   bool HasAnyUndefs;
1470   bool Is128Vec = ResTy.is128BitVector();
1471   bool Is256Vec = ResTy.is256BitVector();
1472 
1473   if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1474       (!Subtarget.hasExtLASX() || !Is256Vec))
1475     return SDValue();
1476 
1477   if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1478                             /*MinSplatBits=*/8) &&
1479       SplatBitSize <= 64) {
1480     // We can only cope with 8, 16, 32, or 64-bit elements.
1481     if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1482         SplatBitSize != 64)
1483       return SDValue();
1484 
1485     EVT ViaVecTy;
1486 
1487     switch (SplatBitSize) {
1488     default:
1489       return SDValue();
1490     case 8:
1491       ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1492       break;
1493     case 16:
1494       ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1495       break;
1496     case 32:
1497       ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1498       break;
1499     case 64:
1500       ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1501       break;
1502     }
1503 
1504     // SelectionDAG::getConstant will promote SplatValue appropriately.
1505     SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1506 
1507     // Bitcast to the type we originally wanted.
1508     if (ViaVecTy != ResTy)
1509       Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1510 
1511     return Result;
1512   }
1513 
1514   if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1515     return Op;
1516 
1517   if (!isConstantOrUndefBUILD_VECTOR(Node)) {
1518     // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1519     // The resulting code is the same length as the expansion, but it doesn't
1520     // use memory operations.
1521     EVT ResTy = Node->getValueType(0);
1522 
1523     assert(ResTy.isVector());
1524 
1525     unsigned NumElts = ResTy.getVectorNumElements();
1526     SDValue Vector = DAG.getUNDEF(ResTy);
1527     for (unsigned i = 0; i < NumElts; ++i) {
1528       Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
1529                            Node->getOperand(i),
1530                            DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1531     }
1532     return Vector;
1533   }
1534 
1535   return SDValue();
1536 }
1537 
1538 SDValue
1539 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1540                                                  SelectionDAG &DAG) const {
1541   EVT VecTy = Op->getOperand(0)->getValueType(0);
1542   SDValue Idx = Op->getOperand(1);
1543   EVT EltTy = VecTy.getVectorElementType();
1544   unsigned NumElts = VecTy.getVectorNumElements();
1545 
1546   if (isa<ConstantSDNode>(Idx) &&
1547       (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1548        EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1549     return Op;
1550 
1551   return SDValue();
1552 }
1553 
1554 SDValue
1555 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1556                                                 SelectionDAG &DAG) const {
1557   if (isa<ConstantSDNode>(Op->getOperand(2)))
1558     return Op;
1559   return SDValue();
1560 }
1561 
1562 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1563                                                    SelectionDAG &DAG) const {
1564   SDLoc DL(Op);
1565   SyncScope::ID FenceSSID =
1566       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1567 
1568   // singlethread fences only synchronize with signal handlers on the same
1569   // thread and thus only need to preserve instruction order, not actually
1570   // enforce memory ordering.
1571   if (FenceSSID == SyncScope::SingleThread)
1572     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1573     return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1574 
1575   return Op;
1576 }
1577 
1578 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1579                                                      SelectionDAG &DAG) const {
1580 
1581   if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1582     DAG.getContext()->emitError(
1583         "On LA64, only 64-bit registers can be written.");
1584     return Op.getOperand(0);
1585   }
1586 
1587   if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1588     DAG.getContext()->emitError(
1589         "On LA32, only 32-bit registers can be written.");
1590     return Op.getOperand(0);
1591   }
1592 
1593   return Op;
1594 }
1595 
1596 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1597                                                 SelectionDAG &DAG) const {
1598   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1599     DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1600                                 "be a constant integer");
1601     return SDValue();
1602   }
1603 
1604   MachineFunction &MF = DAG.getMachineFunction();
1605   MF.getFrameInfo().setFrameAddressIsTaken(true);
1606   Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1607   EVT VT = Op.getValueType();
1608   SDLoc DL(Op);
1609   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1610   unsigned Depth = Op.getConstantOperandVal(0);
1611   int GRLenInBytes = Subtarget.getGRLen() / 8;
1612 
1613   while (Depth--) {
1614     int Offset = -(GRLenInBytes * 2);
1615     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1616                               DAG.getSignedConstant(Offset, DL, VT));
1617     FrameAddr =
1618         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1619   }
1620   return FrameAddr;
1621 }
1622 
1623 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1624                                                  SelectionDAG &DAG) const {
1625   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1626     return SDValue();
1627 
1628   // Currently only support lowering return address for current frame.
1629   if (Op.getConstantOperandVal(0) != 0) {
1630     DAG.getContext()->emitError(
1631         "return address can only be determined for the current frame");
1632     return SDValue();
1633   }
1634 
1635   MachineFunction &MF = DAG.getMachineFunction();
1636   MF.getFrameInfo().setReturnAddressIsTaken(true);
1637   MVT GRLenVT = Subtarget.getGRLenVT();
1638 
1639   // Return the value of the return address register, marking it an implicit
1640   // live-in.
1641   Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1642                               getRegClassFor(GRLenVT));
1643   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1644 }
1645 
1646 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1647                                                    SelectionDAG &DAG) const {
1648   MachineFunction &MF = DAG.getMachineFunction();
1649   auto Size = Subtarget.getGRLen() / 8;
1650   auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1651   return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1652 }
1653 
1654 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1655                                               SelectionDAG &DAG) const {
1656   MachineFunction &MF = DAG.getMachineFunction();
1657   auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1658 
1659   SDLoc DL(Op);
1660   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1661                                  getPointerTy(MF.getDataLayout()));
1662 
1663   // vastart just stores the address of the VarArgsFrameIndex slot into the
1664   // memory location argument.
1665   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1666   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1667                       MachinePointerInfo(SV));
1668 }
1669 
1670 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1671                                                  SelectionDAG &DAG) const {
1672   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1673          !Subtarget.hasBasicD() && "unexpected target features");
1674 
1675   SDLoc DL(Op);
1676   SDValue Op0 = Op.getOperand(0);
1677   if (Op0->getOpcode() == ISD::AND) {
1678     auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1679     if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1680       return Op;
1681   }
1682 
1683   if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1684       Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1685       Op0.getConstantOperandVal(2) == UINT64_C(0))
1686     return Op;
1687 
1688   if (Op0.getOpcode() == ISD::AssertZext &&
1689       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1690     return Op;
1691 
1692   EVT OpVT = Op0.getValueType();
1693   EVT RetVT = Op.getValueType();
1694   RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1695   MakeLibCallOptions CallOptions;
1696   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1697   SDValue Chain = SDValue();
1698   SDValue Result;
1699   std::tie(Result, Chain) =
1700       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1701   return Result;
1702 }
1703 
1704 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1705                                                  SelectionDAG &DAG) const {
1706   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1707          !Subtarget.hasBasicD() && "unexpected target features");
1708 
1709   SDLoc DL(Op);
1710   SDValue Op0 = Op.getOperand(0);
1711 
1712   if ((Op0.getOpcode() == ISD::AssertSext ||
1713        Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
1714       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1715     return Op;
1716 
1717   EVT OpVT = Op0.getValueType();
1718   EVT RetVT = Op.getValueType();
1719   RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1720   MakeLibCallOptions CallOptions;
1721   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1722   SDValue Chain = SDValue();
1723   SDValue Result;
1724   std::tie(Result, Chain) =
1725       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1726   return Result;
1727 }
1728 
1729 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1730                                               SelectionDAG &DAG) const {
1731 
1732   SDLoc DL(Op);
1733   SDValue Op0 = Op.getOperand(0);
1734 
1735   if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1736       Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1737     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1738     return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1739   }
1740   return Op;
1741 }
1742 
1743 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1744                                                  SelectionDAG &DAG) const {
1745 
1746   SDLoc DL(Op);
1747   SDValue Op0 = Op.getOperand(0);
1748 
1749   if (Op0.getValueType() == MVT::f16)
1750     Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
1751 
1752   if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1753       !Subtarget.hasBasicD()) {
1754     SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
1755     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1756   }
1757 
1758   EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1759   SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
1760   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1761 }
1762 
1763 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1764                              SelectionDAG &DAG, unsigned Flags) {
1765   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1766 }
1767 
1768 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1769                              SelectionDAG &DAG, unsigned Flags) {
1770   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1771                                    Flags);
1772 }
1773 
1774 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1775                              SelectionDAG &DAG, unsigned Flags) {
1776   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1777                                    N->getOffset(), Flags);
1778 }
1779 
1780 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1781                              SelectionDAG &DAG, unsigned Flags) {
1782   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1783 }
1784 
1785 template <class NodeTy>
1786 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1787                                          CodeModel::Model M,
1788                                          bool IsLocal) const {
1789   SDLoc DL(N);
1790   EVT Ty = getPointerTy(DAG.getDataLayout());
1791   SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1792   SDValue Load;
1793 
1794   switch (M) {
1795   default:
1796     report_fatal_error("Unsupported code model");
1797 
1798   case CodeModel::Large: {
1799     assert(Subtarget.is64Bit() && "Large code model requires LA64");
1800 
1801     // This is not actually used, but is necessary for successfully matching
1802     // the PseudoLA_*_LARGE nodes.
1803     SDValue Tmp = DAG.getConstant(0, DL, Ty);
1804     if (IsLocal) {
1805       // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1806       // eventually becomes the desired 5-insn code sequence.
1807       Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1808                                         Tmp, Addr),
1809                      0);
1810     } else {
1811       // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1812       // eventually becomes the desired 5-insn code sequence.
1813       Load = SDValue(
1814           DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1815           0);
1816     }
1817     break;
1818   }
1819 
1820   case CodeModel::Small:
1821   case CodeModel::Medium:
1822     if (IsLocal) {
1823       // This generates the pattern (PseudoLA_PCREL sym), which expands to
1824       // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1825       Load = SDValue(
1826           DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1827     } else {
1828       // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1829       // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1830       Load =
1831           SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1832     }
1833   }
1834 
1835   if (!IsLocal) {
1836     // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1837     MachineFunction &MF = DAG.getMachineFunction();
1838     MachineMemOperand *MemOp = MF.getMachineMemOperand(
1839         MachinePointerInfo::getGOT(MF),
1840         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1841             MachineMemOperand::MOInvariant,
1842         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1843     DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1844   }
1845 
1846   return Load;
1847 }
1848 
1849 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1850                                                    SelectionDAG &DAG) const {
1851   return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1852                  DAG.getTarget().getCodeModel());
1853 }
1854 
1855 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1856                                                 SelectionDAG &DAG) const {
1857   return getAddr(cast<JumpTableSDNode>(Op), DAG,
1858                  DAG.getTarget().getCodeModel());
1859 }
1860 
1861 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1862                                                    SelectionDAG &DAG) const {
1863   return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1864                  DAG.getTarget().getCodeModel());
1865 }
1866 
1867 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1868                                                     SelectionDAG &DAG) const {
1869   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1870   assert(N->getOffset() == 0 && "unexpected offset in global node");
1871   auto CM = DAG.getTarget().getCodeModel();
1872   const GlobalValue *GV = N->getGlobal();
1873 
1874   if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1875     if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1876       CM = *GCM;
1877   }
1878 
1879   return getAddr(N, DAG, CM, GV->isDSOLocal());
1880 }
1881 
1882 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1883                                                   SelectionDAG &DAG,
1884                                                   unsigned Opc, bool UseGOT,
1885                                                   bool Large) const {
1886   SDLoc DL(N);
1887   EVT Ty = getPointerTy(DAG.getDataLayout());
1888   MVT GRLenVT = Subtarget.getGRLenVT();
1889 
1890   // This is not actually used, but is necessary for successfully matching the
1891   // PseudoLA_*_LARGE nodes.
1892   SDValue Tmp = DAG.getConstant(0, DL, Ty);
1893   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1894 
1895   // Only IE needs an extra argument for large code model.
1896   SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
1897                        ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1898                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1899 
1900   // If it is LE for normal/medium code model, the add tp operation will occur
1901   // during the pseudo-instruction expansion.
1902   if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
1903     return Offset;
1904 
1905   if (UseGOT) {
1906     // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1907     MachineFunction &MF = DAG.getMachineFunction();
1908     MachineMemOperand *MemOp = MF.getMachineMemOperand(
1909         MachinePointerInfo::getGOT(MF),
1910         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1911             MachineMemOperand::MOInvariant,
1912         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1913     DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1914   }
1915 
1916   // Add the thread pointer.
1917   return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1918                      DAG.getRegister(LoongArch::R2, GRLenVT));
1919 }
1920 
1921 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1922                                                    SelectionDAG &DAG,
1923                                                    unsigned Opc,
1924                                                    bool Large) const {
1925   SDLoc DL(N);
1926   EVT Ty = getPointerTy(DAG.getDataLayout());
1927   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1928 
1929   // This is not actually used, but is necessary for successfully matching the
1930   // PseudoLA_*_LARGE nodes.
1931   SDValue Tmp = DAG.getConstant(0, DL, Ty);
1932 
1933   // Use a PC-relative addressing mode to access the dynamic GOT address.
1934   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1935   SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1936                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1937 
1938   // Prepare argument list to generate call.
1939   ArgListTy Args;
1940   ArgListEntry Entry;
1941   Entry.Node = Load;
1942   Entry.Ty = CallTy;
1943   Args.push_back(Entry);
1944 
1945   // Setup call to __tls_get_addr.
1946   TargetLowering::CallLoweringInfo CLI(DAG);
1947   CLI.setDebugLoc(DL)
1948       .setChain(DAG.getEntryNode())
1949       .setLibCallee(CallingConv::C, CallTy,
1950                     DAG.getExternalSymbol("__tls_get_addr", Ty),
1951                     std::move(Args));
1952 
1953   return LowerCallTo(CLI).first;
1954 }
1955 
1956 SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1957                                                 SelectionDAG &DAG, unsigned Opc,
1958                                                 bool Large) const {
1959   SDLoc DL(N);
1960   EVT Ty = getPointerTy(DAG.getDataLayout());
1961   const GlobalValue *GV = N->getGlobal();
1962 
1963   // This is not actually used, but is necessary for successfully matching the
1964   // PseudoLA_*_LARGE nodes.
1965   SDValue Tmp = DAG.getConstant(0, DL, Ty);
1966 
1967   // Use a PC-relative addressing mode to access the global dynamic GOT address.
1968   // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1969   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1970   return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1971                : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1972 }
1973 
1974 SDValue
1975 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1976                                                SelectionDAG &DAG) const {
1977   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1978       CallingConv::GHC)
1979     report_fatal_error("In GHC calling convention TLS is not supported");
1980 
1981   bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1982   assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1983 
1984   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1985   assert(N->getOffset() == 0 && "unexpected offset in global node");
1986 
1987   if (DAG.getTarget().useEmulatedTLS())
1988     report_fatal_error("the emulated TLS is prohibited",
1989                        /*GenCrashDiag=*/false);
1990 
1991   bool IsDesc = DAG.getTarget().useTLSDESC();
1992 
1993   switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1994   case TLSModel::GeneralDynamic:
1995     // In this model, application code calls the dynamic linker function
1996     // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1997     // runtime.
1998     if (!IsDesc)
1999       return getDynamicTLSAddr(N, DAG,
2000                                Large ? LoongArch::PseudoLA_TLS_GD_LARGE
2001                                      : LoongArch::PseudoLA_TLS_GD,
2002                                Large);
2003     break;
2004   case TLSModel::LocalDynamic:
2005     // Same as GeneralDynamic, except for assembly modifiers and relocation
2006     // records.
2007     if (!IsDesc)
2008       return getDynamicTLSAddr(N, DAG,
2009                                Large ? LoongArch::PseudoLA_TLS_LD_LARGE
2010                                      : LoongArch::PseudoLA_TLS_LD,
2011                                Large);
2012     break;
2013   case TLSModel::InitialExec:
2014     // This model uses the GOT to resolve TLS offsets.
2015     return getStaticTLSAddr(N, DAG,
2016                             Large ? LoongArch::PseudoLA_TLS_IE_LARGE
2017                                   : LoongArch::PseudoLA_TLS_IE,
2018                             /*UseGOT=*/true, Large);
2019   case TLSModel::LocalExec:
2020     // This model is used when static linking as the TLS offsets are resolved
2021     // during program linking.
2022     //
2023     // This node doesn't need an extra argument for the large code model.
2024     return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
2025                             /*UseGOT=*/false, Large);
2026   }
2027 
2028   return getTLSDescAddr(N, DAG,
2029                         Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
2030                               : LoongArch::PseudoLA_TLS_DESC,
2031                         Large);
2032 }
2033 
2034 template <unsigned N>
2035 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
2036                                     SelectionDAG &DAG, bool IsSigned = false) {
2037   auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
2038   // Check the ImmArg.
2039   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2040       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2041     DAG.getContext()->emitError(Op->getOperationName(0) +
2042                                 ": argument out of range.");
2043     return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
2044   }
2045   return SDValue();
2046 }
2047 
2048 SDValue
2049 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
2050                                                  SelectionDAG &DAG) const {
2051   SDLoc DL(Op);
2052   switch (Op.getConstantOperandVal(0)) {
2053   default:
2054     return SDValue(); // Don't custom lower most intrinsics.
2055   case Intrinsic::thread_pointer: {
2056     EVT PtrVT = getPointerTy(DAG.getDataLayout());
2057     return DAG.getRegister(LoongArch::R2, PtrVT);
2058   }
2059   case Intrinsic::loongarch_lsx_vpickve2gr_d:
2060   case Intrinsic::loongarch_lsx_vpickve2gr_du:
2061   case Intrinsic::loongarch_lsx_vreplvei_d:
2062   case Intrinsic::loongarch_lasx_xvrepl128vei_d:
2063     return checkIntrinsicImmArg<1>(Op, 2, DAG);
2064   case Intrinsic::loongarch_lsx_vreplvei_w:
2065   case Intrinsic::loongarch_lasx_xvrepl128vei_w:
2066   case Intrinsic::loongarch_lasx_xvpickve2gr_d:
2067   case Intrinsic::loongarch_lasx_xvpickve2gr_du:
2068   case Intrinsic::loongarch_lasx_xvpickve_d:
2069   case Intrinsic::loongarch_lasx_xvpickve_d_f:
2070     return checkIntrinsicImmArg<2>(Op, 2, DAG);
2071   case Intrinsic::loongarch_lasx_xvinsve0_d:
2072     return checkIntrinsicImmArg<2>(Op, 3, DAG);
2073   case Intrinsic::loongarch_lsx_vsat_b:
2074   case Intrinsic::loongarch_lsx_vsat_bu:
2075   case Intrinsic::loongarch_lsx_vrotri_b:
2076   case Intrinsic::loongarch_lsx_vsllwil_h_b:
2077   case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
2078   case Intrinsic::loongarch_lsx_vsrlri_b:
2079   case Intrinsic::loongarch_lsx_vsrari_b:
2080   case Intrinsic::loongarch_lsx_vreplvei_h:
2081   case Intrinsic::loongarch_lasx_xvsat_b:
2082   case Intrinsic::loongarch_lasx_xvsat_bu:
2083   case Intrinsic::loongarch_lasx_xvrotri_b:
2084   case Intrinsic::loongarch_lasx_xvsllwil_h_b:
2085   case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
2086   case Intrinsic::loongarch_lasx_xvsrlri_b:
2087   case Intrinsic::loongarch_lasx_xvsrari_b:
2088   case Intrinsic::loongarch_lasx_xvrepl128vei_h:
2089   case Intrinsic::loongarch_lasx_xvpickve_w:
2090   case Intrinsic::loongarch_lasx_xvpickve_w_f:
2091     return checkIntrinsicImmArg<3>(Op, 2, DAG);
2092   case Intrinsic::loongarch_lasx_xvinsve0_w:
2093     return checkIntrinsicImmArg<3>(Op, 3, DAG);
2094   case Intrinsic::loongarch_lsx_vsat_h:
2095   case Intrinsic::loongarch_lsx_vsat_hu:
2096   case Intrinsic::loongarch_lsx_vrotri_h:
2097   case Intrinsic::loongarch_lsx_vsllwil_w_h:
2098   case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
2099   case Intrinsic::loongarch_lsx_vsrlri_h:
2100   case Intrinsic::loongarch_lsx_vsrari_h:
2101   case Intrinsic::loongarch_lsx_vreplvei_b:
2102   case Intrinsic::loongarch_lasx_xvsat_h:
2103   case Intrinsic::loongarch_lasx_xvsat_hu:
2104   case Intrinsic::loongarch_lasx_xvrotri_h:
2105   case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2106   case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2107   case Intrinsic::loongarch_lasx_xvsrlri_h:
2108   case Intrinsic::loongarch_lasx_xvsrari_h:
2109   case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2110     return checkIntrinsicImmArg<4>(Op, 2, DAG);
2111   case Intrinsic::loongarch_lsx_vsrlni_b_h:
2112   case Intrinsic::loongarch_lsx_vsrani_b_h:
2113   case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2114   case Intrinsic::loongarch_lsx_vsrarni_b_h:
2115   case Intrinsic::loongarch_lsx_vssrlni_b_h:
2116   case Intrinsic::loongarch_lsx_vssrani_b_h:
2117   case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2118   case Intrinsic::loongarch_lsx_vssrani_bu_h:
2119   case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2120   case Intrinsic::loongarch_lsx_vssrarni_b_h:
2121   case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2122   case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2123   case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2124   case Intrinsic::loongarch_lasx_xvsrani_b_h:
2125   case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2126   case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2127   case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2128   case Intrinsic::loongarch_lasx_xvssrani_b_h:
2129   case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2130   case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2131   case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2132   case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2133   case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2134   case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2135     return checkIntrinsicImmArg<4>(Op, 3, DAG);
2136   case Intrinsic::loongarch_lsx_vsat_w:
2137   case Intrinsic::loongarch_lsx_vsat_wu:
2138   case Intrinsic::loongarch_lsx_vrotri_w:
2139   case Intrinsic::loongarch_lsx_vsllwil_d_w:
2140   case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2141   case Intrinsic::loongarch_lsx_vsrlri_w:
2142   case Intrinsic::loongarch_lsx_vsrari_w:
2143   case Intrinsic::loongarch_lsx_vslei_bu:
2144   case Intrinsic::loongarch_lsx_vslei_hu:
2145   case Intrinsic::loongarch_lsx_vslei_wu:
2146   case Intrinsic::loongarch_lsx_vslei_du:
2147   case Intrinsic::loongarch_lsx_vslti_bu:
2148   case Intrinsic::loongarch_lsx_vslti_hu:
2149   case Intrinsic::loongarch_lsx_vslti_wu:
2150   case Intrinsic::loongarch_lsx_vslti_du:
2151   case Intrinsic::loongarch_lsx_vbsll_v:
2152   case Intrinsic::loongarch_lsx_vbsrl_v:
2153   case Intrinsic::loongarch_lasx_xvsat_w:
2154   case Intrinsic::loongarch_lasx_xvsat_wu:
2155   case Intrinsic::loongarch_lasx_xvrotri_w:
2156   case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2157   case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2158   case Intrinsic::loongarch_lasx_xvsrlri_w:
2159   case Intrinsic::loongarch_lasx_xvsrari_w:
2160   case Intrinsic::loongarch_lasx_xvslei_bu:
2161   case Intrinsic::loongarch_lasx_xvslei_hu:
2162   case Intrinsic::loongarch_lasx_xvslei_wu:
2163   case Intrinsic::loongarch_lasx_xvslei_du:
2164   case Intrinsic::loongarch_lasx_xvslti_bu:
2165   case Intrinsic::loongarch_lasx_xvslti_hu:
2166   case Intrinsic::loongarch_lasx_xvslti_wu:
2167   case Intrinsic::loongarch_lasx_xvslti_du:
2168   case Intrinsic::loongarch_lasx_xvbsll_v:
2169   case Intrinsic::loongarch_lasx_xvbsrl_v:
2170     return checkIntrinsicImmArg<5>(Op, 2, DAG);
2171   case Intrinsic::loongarch_lsx_vseqi_b:
2172   case Intrinsic::loongarch_lsx_vseqi_h:
2173   case Intrinsic::loongarch_lsx_vseqi_w:
2174   case Intrinsic::loongarch_lsx_vseqi_d:
2175   case Intrinsic::loongarch_lsx_vslei_b:
2176   case Intrinsic::loongarch_lsx_vslei_h:
2177   case Intrinsic::loongarch_lsx_vslei_w:
2178   case Intrinsic::loongarch_lsx_vslei_d:
2179   case Intrinsic::loongarch_lsx_vslti_b:
2180   case Intrinsic::loongarch_lsx_vslti_h:
2181   case Intrinsic::loongarch_lsx_vslti_w:
2182   case Intrinsic::loongarch_lsx_vslti_d:
2183   case Intrinsic::loongarch_lasx_xvseqi_b:
2184   case Intrinsic::loongarch_lasx_xvseqi_h:
2185   case Intrinsic::loongarch_lasx_xvseqi_w:
2186   case Intrinsic::loongarch_lasx_xvseqi_d:
2187   case Intrinsic::loongarch_lasx_xvslei_b:
2188   case Intrinsic::loongarch_lasx_xvslei_h:
2189   case Intrinsic::loongarch_lasx_xvslei_w:
2190   case Intrinsic::loongarch_lasx_xvslei_d:
2191   case Intrinsic::loongarch_lasx_xvslti_b:
2192   case Intrinsic::loongarch_lasx_xvslti_h:
2193   case Intrinsic::loongarch_lasx_xvslti_w:
2194   case Intrinsic::loongarch_lasx_xvslti_d:
2195     return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2196   case Intrinsic::loongarch_lsx_vsrlni_h_w:
2197   case Intrinsic::loongarch_lsx_vsrani_h_w:
2198   case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2199   case Intrinsic::loongarch_lsx_vsrarni_h_w:
2200   case Intrinsic::loongarch_lsx_vssrlni_h_w:
2201   case Intrinsic::loongarch_lsx_vssrani_h_w:
2202   case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2203   case Intrinsic::loongarch_lsx_vssrani_hu_w:
2204   case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2205   case Intrinsic::loongarch_lsx_vssrarni_h_w:
2206   case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2207   case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2208   case Intrinsic::loongarch_lsx_vfrstpi_b:
2209   case Intrinsic::loongarch_lsx_vfrstpi_h:
2210   case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2211   case Intrinsic::loongarch_lasx_xvsrani_h_w:
2212   case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2213   case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2214   case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2215   case Intrinsic::loongarch_lasx_xvssrani_h_w:
2216   case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2217   case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2218   case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2219   case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2220   case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2221   case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2222   case Intrinsic::loongarch_lasx_xvfrstpi_b:
2223   case Intrinsic::loongarch_lasx_xvfrstpi_h:
2224     return checkIntrinsicImmArg<5>(Op, 3, DAG);
2225   case Intrinsic::loongarch_lsx_vsat_d:
2226   case Intrinsic::loongarch_lsx_vsat_du:
2227   case Intrinsic::loongarch_lsx_vrotri_d:
2228   case Intrinsic::loongarch_lsx_vsrlri_d:
2229   case Intrinsic::loongarch_lsx_vsrari_d:
2230   case Intrinsic::loongarch_lasx_xvsat_d:
2231   case Intrinsic::loongarch_lasx_xvsat_du:
2232   case Intrinsic::loongarch_lasx_xvrotri_d:
2233   case Intrinsic::loongarch_lasx_xvsrlri_d:
2234   case Intrinsic::loongarch_lasx_xvsrari_d:
2235     return checkIntrinsicImmArg<6>(Op, 2, DAG);
2236   case Intrinsic::loongarch_lsx_vsrlni_w_d:
2237   case Intrinsic::loongarch_lsx_vsrani_w_d:
2238   case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2239   case Intrinsic::loongarch_lsx_vsrarni_w_d:
2240   case Intrinsic::loongarch_lsx_vssrlni_w_d:
2241   case Intrinsic::loongarch_lsx_vssrani_w_d:
2242   case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2243   case Intrinsic::loongarch_lsx_vssrani_wu_d:
2244   case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2245   case Intrinsic::loongarch_lsx_vssrarni_w_d:
2246   case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2247   case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2248   case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2249   case Intrinsic::loongarch_lasx_xvsrani_w_d:
2250   case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2251   case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2252   case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2253   case Intrinsic::loongarch_lasx_xvssrani_w_d:
2254   case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2255   case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2256   case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2257   case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2258   case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2259   case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2260     return checkIntrinsicImmArg<6>(Op, 3, DAG);
2261   case Intrinsic::loongarch_lsx_vsrlni_d_q:
2262   case Intrinsic::loongarch_lsx_vsrani_d_q:
2263   case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2264   case Intrinsic::loongarch_lsx_vsrarni_d_q:
2265   case Intrinsic::loongarch_lsx_vssrlni_d_q:
2266   case Intrinsic::loongarch_lsx_vssrani_d_q:
2267   case Intrinsic::loongarch_lsx_vssrlni_du_q:
2268   case Intrinsic::loongarch_lsx_vssrani_du_q:
2269   case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2270   case Intrinsic::loongarch_lsx_vssrarni_d_q:
2271   case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2272   case Intrinsic::loongarch_lsx_vssrarni_du_q:
2273   case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2274   case Intrinsic::loongarch_lasx_xvsrani_d_q:
2275   case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2276   case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2277   case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2278   case Intrinsic::loongarch_lasx_xvssrani_d_q:
2279   case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2280   case Intrinsic::loongarch_lasx_xvssrani_du_q:
2281   case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2282   case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2283   case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2284   case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2285     return checkIntrinsicImmArg<7>(Op, 3, DAG);
2286   case Intrinsic::loongarch_lsx_vnori_b:
2287   case Intrinsic::loongarch_lsx_vshuf4i_b:
2288   case Intrinsic::loongarch_lsx_vshuf4i_h:
2289   case Intrinsic::loongarch_lsx_vshuf4i_w:
2290   case Intrinsic::loongarch_lasx_xvnori_b:
2291   case Intrinsic::loongarch_lasx_xvshuf4i_b:
2292   case Intrinsic::loongarch_lasx_xvshuf4i_h:
2293   case Intrinsic::loongarch_lasx_xvshuf4i_w:
2294   case Intrinsic::loongarch_lasx_xvpermi_d:
2295     return checkIntrinsicImmArg<8>(Op, 2, DAG);
2296   case Intrinsic::loongarch_lsx_vshuf4i_d:
2297   case Intrinsic::loongarch_lsx_vpermi_w:
2298   case Intrinsic::loongarch_lsx_vbitseli_b:
2299   case Intrinsic::loongarch_lsx_vextrins_b:
2300   case Intrinsic::loongarch_lsx_vextrins_h:
2301   case Intrinsic::loongarch_lsx_vextrins_w:
2302   case Intrinsic::loongarch_lsx_vextrins_d:
2303   case Intrinsic::loongarch_lasx_xvshuf4i_d:
2304   case Intrinsic::loongarch_lasx_xvpermi_w:
2305   case Intrinsic::loongarch_lasx_xvpermi_q:
2306   case Intrinsic::loongarch_lasx_xvbitseli_b:
2307   case Intrinsic::loongarch_lasx_xvextrins_b:
2308   case Intrinsic::loongarch_lasx_xvextrins_h:
2309   case Intrinsic::loongarch_lasx_xvextrins_w:
2310   case Intrinsic::loongarch_lasx_xvextrins_d:
2311     return checkIntrinsicImmArg<8>(Op, 3, DAG);
2312   case Intrinsic::loongarch_lsx_vrepli_b:
2313   case Intrinsic::loongarch_lsx_vrepli_h:
2314   case Intrinsic::loongarch_lsx_vrepli_w:
2315   case Intrinsic::loongarch_lsx_vrepli_d:
2316   case Intrinsic::loongarch_lasx_xvrepli_b:
2317   case Intrinsic::loongarch_lasx_xvrepli_h:
2318   case Intrinsic::loongarch_lasx_xvrepli_w:
2319   case Intrinsic::loongarch_lasx_xvrepli_d:
2320     return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2321   case Intrinsic::loongarch_lsx_vldi:
2322   case Intrinsic::loongarch_lasx_xvldi:
2323     return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2324   }
2325 }
2326 
2327 // Helper function that emits error message for intrinsics with chain and return
2328 // merge values of a UNDEF and the chain.
2329 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
2330                                                   StringRef ErrorMsg,
2331                                                   SelectionDAG &DAG) {
2332   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2333   return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2334                             SDLoc(Op));
2335 }
2336 
2337 SDValue
2338 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2339                                                 SelectionDAG &DAG) const {
2340   SDLoc DL(Op);
2341   MVT GRLenVT = Subtarget.getGRLenVT();
2342   EVT VT = Op.getValueType();
2343   SDValue Chain = Op.getOperand(0);
2344   const StringRef ErrorMsgOOR = "argument out of range";
2345   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2346   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2347 
2348   switch (Op.getConstantOperandVal(1)) {
2349   default:
2350     return Op;
2351   case Intrinsic::loongarch_crc_w_b_w:
2352   case Intrinsic::loongarch_crc_w_h_w:
2353   case Intrinsic::loongarch_crc_w_w_w:
2354   case Intrinsic::loongarch_crc_w_d_w:
2355   case Intrinsic::loongarch_crcc_w_b_w:
2356   case Intrinsic::loongarch_crcc_w_h_w:
2357   case Intrinsic::loongarch_crcc_w_w_w:
2358   case Intrinsic::loongarch_crcc_w_d_w:
2359     return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2360   case Intrinsic::loongarch_csrrd_w:
2361   case Intrinsic::loongarch_csrrd_d: {
2362     unsigned Imm = Op.getConstantOperandVal(2);
2363     return !isUInt<14>(Imm)
2364                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2365                : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2366                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2367   }
2368   case Intrinsic::loongarch_csrwr_w:
2369   case Intrinsic::loongarch_csrwr_d: {
2370     unsigned Imm = Op.getConstantOperandVal(3);
2371     return !isUInt<14>(Imm)
2372                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2373                : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2374                              {Chain, Op.getOperand(2),
2375                               DAG.getConstant(Imm, DL, GRLenVT)});
2376   }
2377   case Intrinsic::loongarch_csrxchg_w:
2378   case Intrinsic::loongarch_csrxchg_d: {
2379     unsigned Imm = Op.getConstantOperandVal(4);
2380     return !isUInt<14>(Imm)
2381                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2382                : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2383                              {Chain, Op.getOperand(2), Op.getOperand(3),
2384                               DAG.getConstant(Imm, DL, GRLenVT)});
2385   }
2386   case Intrinsic::loongarch_iocsrrd_d: {
2387     return DAG.getNode(
2388         LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2389         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2390   }
2391 #define IOCSRRD_CASE(NAME, NODE)                                               \
2392   case Intrinsic::loongarch_##NAME: {                                          \
2393     return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other},          \
2394                        {Chain, Op.getOperand(2)});                             \
2395   }
2396     IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2397     IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2398     IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2399 #undef IOCSRRD_CASE
2400   case Intrinsic::loongarch_cpucfg: {
2401     return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2402                        {Chain, Op.getOperand(2)});
2403   }
2404   case Intrinsic::loongarch_lddir_d: {
2405     unsigned Imm = Op.getConstantOperandVal(3);
2406     return !isUInt<8>(Imm)
2407                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2408                : Op;
2409   }
2410   case Intrinsic::loongarch_movfcsr2gr: {
2411     if (!Subtarget.hasBasicF())
2412       return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2413     unsigned Imm = Op.getConstantOperandVal(2);
2414     return !isUInt<2>(Imm)
2415                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2416                : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2417                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2418   }
2419   case Intrinsic::loongarch_lsx_vld:
2420   case Intrinsic::loongarch_lsx_vldrepl_b:
2421   case Intrinsic::loongarch_lasx_xvld:
2422   case Intrinsic::loongarch_lasx_xvldrepl_b:
2423     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2424                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2425                : SDValue();
2426   case Intrinsic::loongarch_lsx_vldrepl_h:
2427   case Intrinsic::loongarch_lasx_xvldrepl_h:
2428     return !isShiftedInt<11, 1>(
2429                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2430                ? emitIntrinsicWithChainErrorMessage(
2431                      Op, "argument out of range or not a multiple of 2", DAG)
2432                : SDValue();
2433   case Intrinsic::loongarch_lsx_vldrepl_w:
2434   case Intrinsic::loongarch_lasx_xvldrepl_w:
2435     return !isShiftedInt<10, 2>(
2436                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2437                ? emitIntrinsicWithChainErrorMessage(
2438                      Op, "argument out of range or not a multiple of 4", DAG)
2439                : SDValue();
2440   case Intrinsic::loongarch_lsx_vldrepl_d:
2441   case Intrinsic::loongarch_lasx_xvldrepl_d:
2442     return !isShiftedInt<9, 3>(
2443                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2444                ? emitIntrinsicWithChainErrorMessage(
2445                      Op, "argument out of range or not a multiple of 8", DAG)
2446                : SDValue();
2447   }
2448 }
2449 
2450 // Helper function that emits error message for intrinsics with void return
2451 // value and return the chain.
2452 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
2453                                          SelectionDAG &DAG) {
2454 
2455   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2456   return Op.getOperand(0);
2457 }
2458 
2459 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2460                                                      SelectionDAG &DAG) const {
2461   SDLoc DL(Op);
2462   MVT GRLenVT = Subtarget.getGRLenVT();
2463   SDValue Chain = Op.getOperand(0);
2464   uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2465   SDValue Op2 = Op.getOperand(2);
2466   const StringRef ErrorMsgOOR = "argument out of range";
2467   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2468   const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2469   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2470 
2471   switch (IntrinsicEnum) {
2472   default:
2473     // TODO: Add more Intrinsics.
2474     return SDValue();
2475   case Intrinsic::loongarch_cacop_d:
2476   case Intrinsic::loongarch_cacop_w: {
2477     if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2478       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2479     if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2480       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2481     // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2482     unsigned Imm1 = Op2->getAsZExtVal();
2483     int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2484     if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2485       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2486     return Op;
2487   }
2488   case Intrinsic::loongarch_dbar: {
2489     unsigned Imm = Op2->getAsZExtVal();
2490     return !isUInt<15>(Imm)
2491                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2492                : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2493                              DAG.getConstant(Imm, DL, GRLenVT));
2494   }
2495   case Intrinsic::loongarch_ibar: {
2496     unsigned Imm = Op2->getAsZExtVal();
2497     return !isUInt<15>(Imm)
2498                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2499                : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2500                              DAG.getConstant(Imm, DL, GRLenVT));
2501   }
2502   case Intrinsic::loongarch_break: {
2503     unsigned Imm = Op2->getAsZExtVal();
2504     return !isUInt<15>(Imm)
2505                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2506                : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2507                              DAG.getConstant(Imm, DL, GRLenVT));
2508   }
2509   case Intrinsic::loongarch_movgr2fcsr: {
2510     if (!Subtarget.hasBasicF())
2511       return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2512     unsigned Imm = Op2->getAsZExtVal();
2513     return !isUInt<2>(Imm)
2514                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2515                : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2516                              DAG.getConstant(Imm, DL, GRLenVT),
2517                              DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2518                                          Op.getOperand(3)));
2519   }
2520   case Intrinsic::loongarch_syscall: {
2521     unsigned Imm = Op2->getAsZExtVal();
2522     return !isUInt<15>(Imm)
2523                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2524                : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2525                              DAG.getConstant(Imm, DL, GRLenVT));
2526   }
2527 #define IOCSRWR_CASE(NAME, NODE)                                               \
2528   case Intrinsic::loongarch_##NAME: {                                          \
2529     SDValue Op3 = Op.getOperand(3);                                            \
2530     return Subtarget.is64Bit()                                                 \
2531                ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain,        \
2532                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),  \
2533                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3))  \
2534                : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2,   \
2535                              Op3);                                             \
2536   }
2537     IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2538     IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2539     IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2540 #undef IOCSRWR_CASE
2541   case Intrinsic::loongarch_iocsrwr_d: {
2542     return !Subtarget.is64Bit()
2543                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2544                : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2545                              Op2,
2546                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2547                                          Op.getOperand(3)));
2548   }
2549 #define ASRT_LE_GT_CASE(NAME)                                                  \
2550   case Intrinsic::loongarch_##NAME: {                                          \
2551     return !Subtarget.is64Bit()                                                \
2552                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)           \
2553                : Op;                                                           \
2554   }
2555     ASRT_LE_GT_CASE(asrtle_d)
2556     ASRT_LE_GT_CASE(asrtgt_d)
2557 #undef ASRT_LE_GT_CASE
2558   case Intrinsic::loongarch_ldpte_d: {
2559     unsigned Imm = Op.getConstantOperandVal(3);
2560     return !Subtarget.is64Bit()
2561                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2562            : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2563                              : Op;
2564   }
2565   case Intrinsic::loongarch_lsx_vst:
2566   case Intrinsic::loongarch_lasx_xvst:
2567     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2568                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2569                : SDValue();
2570   case Intrinsic::loongarch_lasx_xvstelm_b:
2571     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2572             !isUInt<5>(Op.getConstantOperandVal(5)))
2573                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2574                : SDValue();
2575   case Intrinsic::loongarch_lsx_vstelm_b:
2576     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2577             !isUInt<4>(Op.getConstantOperandVal(5)))
2578                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2579                : SDValue();
2580   case Intrinsic::loongarch_lasx_xvstelm_h:
2581     return (!isShiftedInt<8, 1>(
2582                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2583             !isUInt<4>(Op.getConstantOperandVal(5)))
2584                ? emitIntrinsicErrorMessage(
2585                      Op, "argument out of range or not a multiple of 2", DAG)
2586                : SDValue();
2587   case Intrinsic::loongarch_lsx_vstelm_h:
2588     return (!isShiftedInt<8, 1>(
2589                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2590             !isUInt<3>(Op.getConstantOperandVal(5)))
2591                ? emitIntrinsicErrorMessage(
2592                      Op, "argument out of range or not a multiple of 2", DAG)
2593                : SDValue();
2594   case Intrinsic::loongarch_lasx_xvstelm_w:
2595     return (!isShiftedInt<8, 2>(
2596                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2597             !isUInt<3>(Op.getConstantOperandVal(5)))
2598                ? emitIntrinsicErrorMessage(
2599                      Op, "argument out of range or not a multiple of 4", DAG)
2600                : SDValue();
2601   case Intrinsic::loongarch_lsx_vstelm_w:
2602     return (!isShiftedInt<8, 2>(
2603                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2604             !isUInt<2>(Op.getConstantOperandVal(5)))
2605                ? emitIntrinsicErrorMessage(
2606                      Op, "argument out of range or not a multiple of 4", DAG)
2607                : SDValue();
2608   case Intrinsic::loongarch_lasx_xvstelm_d:
2609     return (!isShiftedInt<8, 3>(
2610                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2611             !isUInt<2>(Op.getConstantOperandVal(5)))
2612                ? emitIntrinsicErrorMessage(
2613                      Op, "argument out of range or not a multiple of 8", DAG)
2614                : SDValue();
2615   case Intrinsic::loongarch_lsx_vstelm_d:
2616     return (!isShiftedInt<8, 3>(
2617                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2618             !isUInt<1>(Op.getConstantOperandVal(5)))
2619                ? emitIntrinsicErrorMessage(
2620                      Op, "argument out of range or not a multiple of 8", DAG)
2621                : SDValue();
2622   }
2623 }
2624 
2625 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2626                                                      SelectionDAG &DAG) const {
2627   SDLoc DL(Op);
2628   SDValue Lo = Op.getOperand(0);
2629   SDValue Hi = Op.getOperand(1);
2630   SDValue Shamt = Op.getOperand(2);
2631   EVT VT = Lo.getValueType();
2632 
2633   // if Shamt-GRLen < 0: // Shamt < GRLen
2634   //   Lo = Lo << Shamt
2635   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2636   // else:
2637   //   Lo = 0
2638   //   Hi = Lo << (Shamt-GRLen)
2639 
2640   SDValue Zero = DAG.getConstant(0, DL, VT);
2641   SDValue One = DAG.getConstant(1, DL, VT);
2642   SDValue MinusGRLen =
2643       DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2644   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2645   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2646   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2647 
2648   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2649   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2650   SDValue ShiftRightLo =
2651       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2652   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2653   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2654   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2655 
2656   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2657 
2658   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2659   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2660 
2661   SDValue Parts[2] = {Lo, Hi};
2662   return DAG.getMergeValues(Parts, DL);
2663 }
2664 
2665 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2666                                                       SelectionDAG &DAG,
2667                                                       bool IsSRA) const {
2668   SDLoc DL(Op);
2669   SDValue Lo = Op.getOperand(0);
2670   SDValue Hi = Op.getOperand(1);
2671   SDValue Shamt = Op.getOperand(2);
2672   EVT VT = Lo.getValueType();
2673 
2674   // SRA expansion:
2675   //   if Shamt-GRLen < 0: // Shamt < GRLen
2676   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2677   //     Hi = Hi >>s Shamt
2678   //   else:
2679   //     Lo = Hi >>s (Shamt-GRLen);
2680   //     Hi = Hi >>s (GRLen-1)
2681   //
2682   // SRL expansion:
2683   //   if Shamt-GRLen < 0: // Shamt < GRLen
2684   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2685   //     Hi = Hi >>u Shamt
2686   //   else:
2687   //     Lo = Hi >>u (Shamt-GRLen);
2688   //     Hi = 0;
2689 
2690   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2691 
2692   SDValue Zero = DAG.getConstant(0, DL, VT);
2693   SDValue One = DAG.getConstant(1, DL, VT);
2694   SDValue MinusGRLen =
2695       DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2696   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2697   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2698   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2699 
2700   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2701   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2702   SDValue ShiftLeftHi =
2703       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2704   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2705   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2706   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2707   SDValue HiFalse =
2708       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2709 
2710   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2711 
2712   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2713   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2714 
2715   SDValue Parts[2] = {Lo, Hi};
2716   return DAG.getMergeValues(Parts, DL);
2717 }
2718 
2719 // Returns the opcode of the target-specific SDNode that implements the 32-bit
2720 // form of the given Opcode.
2721 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
2722   switch (Opcode) {
2723   default:
2724     llvm_unreachable("Unexpected opcode");
2725   case ISD::SDIV:
2726     return LoongArchISD::DIV_W;
2727   case ISD::UDIV:
2728     return LoongArchISD::DIV_WU;
2729   case ISD::SREM:
2730     return LoongArchISD::MOD_W;
2731   case ISD::UREM:
2732     return LoongArchISD::MOD_WU;
2733   case ISD::SHL:
2734     return LoongArchISD::SLL_W;
2735   case ISD::SRA:
2736     return LoongArchISD::SRA_W;
2737   case ISD::SRL:
2738     return LoongArchISD::SRL_W;
2739   case ISD::ROTL:
2740   case ISD::ROTR:
2741     return LoongArchISD::ROTR_W;
2742   case ISD::CTTZ:
2743     return LoongArchISD::CTZ_W;
2744   case ISD::CTLZ:
2745     return LoongArchISD::CLZ_W;
2746   }
2747 }
2748 
2749 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2750 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2751 // otherwise be promoted to i64, making it difficult to select the
2752 // SLL_W/.../*W later one because the fact the operation was originally of
2753 // type i8/i16/i32 is lost.
2754 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
2755                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
2756   SDLoc DL(N);
2757   LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2758   SDValue NewOp0, NewRes;
2759 
2760   switch (NumOp) {
2761   default:
2762     llvm_unreachable("Unexpected NumOp");
2763   case 1: {
2764     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2765     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2766     break;
2767   }
2768   case 2: {
2769     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2770     SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2771     if (N->getOpcode() == ISD::ROTL) {
2772       SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2773       NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2774     }
2775     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2776     break;
2777   }
2778     // TODO:Handle more NumOp.
2779   }
2780 
2781   // ReplaceNodeResults requires we maintain the same type for the return
2782   // value.
2783   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2784 }
2785 
2786 // Converts the given 32-bit operation to a i64 operation with signed extension
2787 // semantic to reduce the signed extension instructions.
2788 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2789   SDLoc DL(N);
2790   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2791   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2792   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2793   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2794                                DAG.getValueType(MVT::i32));
2795   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2796 }
2797 
2798 // Helper function that emits error message for intrinsics with/without chain
2799 // and return a UNDEF or and the chain as the results.
2800 static void emitErrorAndReplaceIntrinsicResults(
2801     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
2802     StringRef ErrorMsg, bool WithChain = true) {
2803   DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2804   Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2805   if (!WithChain)
2806     return;
2807   Results.push_back(N->getOperand(0));
2808 }
2809 
2810 template <unsigned N>
2811 static void
2812 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
2813                          SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2814                          unsigned ResOp) {
2815   const StringRef ErrorMsgOOR = "argument out of range";
2816   unsigned Imm = Node->getConstantOperandVal(2);
2817   if (!isUInt<N>(Imm)) {
2818     emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
2819                                         /*WithChain=*/false);
2820     return;
2821   }
2822   SDLoc DL(Node);
2823   SDValue Vec = Node->getOperand(1);
2824 
2825   SDValue PickElt =
2826       DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2827                   DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2828                   DAG.getValueType(Vec.getValueType().getVectorElementType()));
2829   Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2830                                 PickElt.getValue(0)));
2831 }
2832 
2833 static void replaceVecCondBranchResults(SDNode *N,
2834                                         SmallVectorImpl<SDValue> &Results,
2835                                         SelectionDAG &DAG,
2836                                         const LoongArchSubtarget &Subtarget,
2837                                         unsigned ResOp) {
2838   SDLoc DL(N);
2839   SDValue Vec = N->getOperand(1);
2840 
2841   SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2842   Results.push_back(
2843       DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2844 }
2845 
2846 static void
2847 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
2848                                  SelectionDAG &DAG,
2849                                  const LoongArchSubtarget &Subtarget) {
2850   switch (N->getConstantOperandVal(0)) {
2851   default:
2852     llvm_unreachable("Unexpected Intrinsic.");
2853   case Intrinsic::loongarch_lsx_vpickve2gr_b:
2854     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2855                                 LoongArchISD::VPICK_SEXT_ELT);
2856     break;
2857   case Intrinsic::loongarch_lsx_vpickve2gr_h:
2858   case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2859     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2860                                 LoongArchISD::VPICK_SEXT_ELT);
2861     break;
2862   case Intrinsic::loongarch_lsx_vpickve2gr_w:
2863     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2864                                 LoongArchISD::VPICK_SEXT_ELT);
2865     break;
2866   case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2867     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2868                                 LoongArchISD::VPICK_ZEXT_ELT);
2869     break;
2870   case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2871   case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2872     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2873                                 LoongArchISD::VPICK_ZEXT_ELT);
2874     break;
2875   case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2876     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2877                                 LoongArchISD::VPICK_ZEXT_ELT);
2878     break;
2879   case Intrinsic::loongarch_lsx_bz_b:
2880   case Intrinsic::loongarch_lsx_bz_h:
2881   case Intrinsic::loongarch_lsx_bz_w:
2882   case Intrinsic::loongarch_lsx_bz_d:
2883   case Intrinsic::loongarch_lasx_xbz_b:
2884   case Intrinsic::loongarch_lasx_xbz_h:
2885   case Intrinsic::loongarch_lasx_xbz_w:
2886   case Intrinsic::loongarch_lasx_xbz_d:
2887     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2888                                 LoongArchISD::VALL_ZERO);
2889     break;
2890   case Intrinsic::loongarch_lsx_bz_v:
2891   case Intrinsic::loongarch_lasx_xbz_v:
2892     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2893                                 LoongArchISD::VANY_ZERO);
2894     break;
2895   case Intrinsic::loongarch_lsx_bnz_b:
2896   case Intrinsic::loongarch_lsx_bnz_h:
2897   case Intrinsic::loongarch_lsx_bnz_w:
2898   case Intrinsic::loongarch_lsx_bnz_d:
2899   case Intrinsic::loongarch_lasx_xbnz_b:
2900   case Intrinsic::loongarch_lasx_xbnz_h:
2901   case Intrinsic::loongarch_lasx_xbnz_w:
2902   case Intrinsic::loongarch_lasx_xbnz_d:
2903     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2904                                 LoongArchISD::VALL_NONZERO);
2905     break;
2906   case Intrinsic::loongarch_lsx_bnz_v:
2907   case Intrinsic::loongarch_lasx_xbnz_v:
2908     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2909                                 LoongArchISD::VANY_NONZERO);
2910     break;
2911   }
2912 }
2913 
2914 static void replaceCMP_XCHG_128Results(SDNode *N,
2915                                        SmallVectorImpl<SDValue> &Results,
2916                                        SelectionDAG &DAG) {
2917   assert(N->getValueType(0) == MVT::i128 &&
2918          "AtomicCmpSwap on types less than 128 should be legal");
2919   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2920 
2921   unsigned Opcode;
2922   switch (MemOp->getMergedOrdering()) {
2923   case AtomicOrdering::Acquire:
2924   case AtomicOrdering::AcquireRelease:
2925   case AtomicOrdering::SequentiallyConsistent:
2926     Opcode = LoongArch::PseudoCmpXchg128Acquire;
2927     break;
2928   case AtomicOrdering::Monotonic:
2929   case AtomicOrdering::Release:
2930     Opcode = LoongArch::PseudoCmpXchg128;
2931     break;
2932   default:
2933     llvm_unreachable("Unexpected ordering!");
2934   }
2935 
2936   SDLoc DL(N);
2937   auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
2938   auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
2939   SDValue Ops[] = {N->getOperand(1), CmpVal.first,  CmpVal.second,
2940                    NewVal.first,     NewVal.second, N->getOperand(0)};
2941 
2942   SDNode *CmpSwap = DAG.getMachineNode(
2943       Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
2944       Ops);
2945   DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2946   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
2947                                 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
2948   Results.push_back(SDValue(CmpSwap, 3));
2949 }
2950 
2951 void LoongArchTargetLowering::ReplaceNodeResults(
2952     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2953   SDLoc DL(N);
2954   EVT VT = N->getValueType(0);
2955   switch (N->getOpcode()) {
2956   default:
2957     llvm_unreachable("Don't know how to legalize this operation");
2958   case ISD::ADD:
2959   case ISD::SUB:
2960     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2961            "Unexpected custom legalisation");
2962     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2963     break;
2964   case ISD::SDIV:
2965   case ISD::UDIV:
2966   case ISD::SREM:
2967   case ISD::UREM:
2968     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2969            "Unexpected custom legalisation");
2970     Results.push_back(customLegalizeToWOp(N, DAG, 2,
2971                                           Subtarget.hasDiv32() && VT == MVT::i32
2972                                               ? ISD::ANY_EXTEND
2973                                               : ISD::SIGN_EXTEND));
2974     break;
2975   case ISD::SHL:
2976   case ISD::SRA:
2977   case ISD::SRL:
2978     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2979            "Unexpected custom legalisation");
2980     if (N->getOperand(1).getOpcode() != ISD::Constant) {
2981       Results.push_back(customLegalizeToWOp(N, DAG, 2));
2982       break;
2983     }
2984     break;
2985   case ISD::ROTL:
2986   case ISD::ROTR:
2987     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2988            "Unexpected custom legalisation");
2989     Results.push_back(customLegalizeToWOp(N, DAG, 2));
2990     break;
2991   case ISD::FP_TO_SINT: {
2992     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2993            "Unexpected custom legalisation");
2994     SDValue Src = N->getOperand(0);
2995     EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2996     if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2997         TargetLowering::TypeSoftenFloat) {
2998       if (Src.getValueType() == MVT::f16)
2999         Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
3000       SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
3001       Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
3002       return;
3003     }
3004     // If the FP type needs to be softened, emit a library call using the 'si'
3005     // version. If we left it to default legalization we'd end up with 'di'.
3006     RTLIB::Libcall LC;
3007     LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
3008     MakeLibCallOptions CallOptions;
3009     EVT OpVT = Src.getValueType();
3010     CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
3011     SDValue Chain = SDValue();
3012     SDValue Result;
3013     std::tie(Result, Chain) =
3014         makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
3015     Results.push_back(Result);
3016     break;
3017   }
3018   case ISD::BITCAST: {
3019     SDValue Src = N->getOperand(0);
3020     EVT SrcVT = Src.getValueType();
3021     if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
3022         Subtarget.hasBasicF()) {
3023       SDValue Dst =
3024           DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
3025       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
3026     }
3027     break;
3028   }
3029   case ISD::FP_TO_UINT: {
3030     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3031            "Unexpected custom legalisation");
3032     auto &TLI = DAG.getTargetLoweringInfo();
3033     SDValue Tmp1, Tmp2;
3034     TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
3035     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
3036     break;
3037   }
3038   case ISD::BSWAP: {
3039     SDValue Src = N->getOperand(0);
3040     assert((VT == MVT::i16 || VT == MVT::i32) &&
3041            "Unexpected custom legalization");
3042     MVT GRLenVT = Subtarget.getGRLenVT();
3043     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
3044     SDValue Tmp;
3045     switch (VT.getSizeInBits()) {
3046     default:
3047       llvm_unreachable("Unexpected operand width");
3048     case 16:
3049       Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
3050       break;
3051     case 32:
3052       // Only LA64 will get to here due to the size mismatch between VT and
3053       // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
3054       Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
3055       break;
3056     }
3057     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3058     break;
3059   }
3060   case ISD::BITREVERSE: {
3061     SDValue Src = N->getOperand(0);
3062     assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
3063            "Unexpected custom legalization");
3064     MVT GRLenVT = Subtarget.getGRLenVT();
3065     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
3066     SDValue Tmp;
3067     switch (VT.getSizeInBits()) {
3068     default:
3069       llvm_unreachable("Unexpected operand width");
3070     case 8:
3071       Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
3072       break;
3073     case 32:
3074       Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
3075       break;
3076     }
3077     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3078     break;
3079   }
3080   case ISD::CTLZ:
3081   case ISD::CTTZ: {
3082     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3083            "Unexpected custom legalisation");
3084     Results.push_back(customLegalizeToWOp(N, DAG, 1));
3085     break;
3086   }
3087   case ISD::INTRINSIC_W_CHAIN: {
3088     SDValue Chain = N->getOperand(0);
3089     SDValue Op2 = N->getOperand(2);
3090     MVT GRLenVT = Subtarget.getGRLenVT();
3091     const StringRef ErrorMsgOOR = "argument out of range";
3092     const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3093     const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3094 
3095     switch (N->getConstantOperandVal(1)) {
3096     default:
3097       llvm_unreachable("Unexpected Intrinsic.");
3098     case Intrinsic::loongarch_movfcsr2gr: {
3099       if (!Subtarget.hasBasicF()) {
3100         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
3101         return;
3102       }
3103       unsigned Imm = Op2->getAsZExtVal();
3104       if (!isUInt<2>(Imm)) {
3105         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3106         return;
3107       }
3108       SDValue MOVFCSR2GRResults = DAG.getNode(
3109           LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
3110           {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3111       Results.push_back(
3112           DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
3113       Results.push_back(MOVFCSR2GRResults.getValue(1));
3114       break;
3115     }
3116 #define CRC_CASE_EXT_BINARYOP(NAME, NODE)                                      \
3117   case Intrinsic::loongarch_##NAME: {                                          \
3118     SDValue NODE = DAG.getNode(                                                \
3119         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
3120         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),               \
3121          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
3122     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
3123     Results.push_back(NODE.getValue(1));                                       \
3124     break;                                                                     \
3125   }
3126       CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
3127       CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
3128       CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
3129       CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
3130       CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
3131       CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
3132 #undef CRC_CASE_EXT_BINARYOP
3133 
3134 #define CRC_CASE_EXT_UNARYOP(NAME, NODE)                                       \
3135   case Intrinsic::loongarch_##NAME: {                                          \
3136     SDValue NODE = DAG.getNode(                                                \
3137         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
3138         {Chain, Op2,                                                           \
3139          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
3140     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
3141     Results.push_back(NODE.getValue(1));                                       \
3142     break;                                                                     \
3143   }
3144       CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
3145       CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
3146 #undef CRC_CASE_EXT_UNARYOP
3147 #define CSR_CASE(ID)                                                           \
3148   case Intrinsic::loongarch_##ID: {                                            \
3149     if (!Subtarget.is64Bit())                                                  \
3150       emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);   \
3151     break;                                                                     \
3152   }
3153       CSR_CASE(csrrd_d);
3154       CSR_CASE(csrwr_d);
3155       CSR_CASE(csrxchg_d);
3156       CSR_CASE(iocsrrd_d);
3157 #undef CSR_CASE
3158     case Intrinsic::loongarch_csrrd_w: {
3159       unsigned Imm = Op2->getAsZExtVal();
3160       if (!isUInt<14>(Imm)) {
3161         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3162         return;
3163       }
3164       SDValue CSRRDResults =
3165           DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3166                       {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3167       Results.push_back(
3168           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3169       Results.push_back(CSRRDResults.getValue(1));
3170       break;
3171     }
3172     case Intrinsic::loongarch_csrwr_w: {
3173       unsigned Imm = N->getConstantOperandVal(3);
3174       if (!isUInt<14>(Imm)) {
3175         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3176         return;
3177       }
3178       SDValue CSRWRResults =
3179           DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3180                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3181                        DAG.getConstant(Imm, DL, GRLenVT)});
3182       Results.push_back(
3183           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3184       Results.push_back(CSRWRResults.getValue(1));
3185       break;
3186     }
3187     case Intrinsic::loongarch_csrxchg_w: {
3188       unsigned Imm = N->getConstantOperandVal(4);
3189       if (!isUInt<14>(Imm)) {
3190         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3191         return;
3192       }
3193       SDValue CSRXCHGResults = DAG.getNode(
3194           LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3195           {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3196            DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3197            DAG.getConstant(Imm, DL, GRLenVT)});
3198       Results.push_back(
3199           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3200       Results.push_back(CSRXCHGResults.getValue(1));
3201       break;
3202     }
3203 #define IOCSRRD_CASE(NAME, NODE)                                               \
3204   case Intrinsic::loongarch_##NAME: {                                          \
3205     SDValue IOCSRRDResults =                                                   \
3206         DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},            \
3207                     {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3208     Results.push_back(                                                         \
3209         DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0)));       \
3210     Results.push_back(IOCSRRDResults.getValue(1));                             \
3211     break;                                                                     \
3212   }
3213       IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3214       IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3215       IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3216 #undef IOCSRRD_CASE
3217     case Intrinsic::loongarch_cpucfg: {
3218       SDValue CPUCFGResults =
3219           DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3220                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3221       Results.push_back(
3222           DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3223       Results.push_back(CPUCFGResults.getValue(1));
3224       break;
3225     }
3226     case Intrinsic::loongarch_lddir_d: {
3227       if (!Subtarget.is64Bit()) {
3228         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3229         return;
3230       }
3231       break;
3232     }
3233     }
3234     break;
3235   }
3236   case ISD::READ_REGISTER: {
3237     if (Subtarget.is64Bit())
3238       DAG.getContext()->emitError(
3239           "On LA64, only 64-bit registers can be read.");
3240     else
3241       DAG.getContext()->emitError(
3242           "On LA32, only 32-bit registers can be read.");
3243     Results.push_back(DAG.getUNDEF(VT));
3244     Results.push_back(N->getOperand(0));
3245     break;
3246   }
3247   case ISD::INTRINSIC_WO_CHAIN: {
3248     replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3249     break;
3250   }
3251   case ISD::LROUND: {
3252     SDValue Op0 = N->getOperand(0);
3253     EVT OpVT = Op0.getValueType();
3254     RTLIB::Libcall LC =
3255         OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
3256     MakeLibCallOptions CallOptions;
3257     CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
3258     SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
3259     Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
3260     Results.push_back(Result);
3261     break;
3262   }
3263   case ISD::ATOMIC_CMP_SWAP: {
3264     replaceCMP_XCHG_128Results(N, Results, DAG);
3265     break;
3266   }
3267   }
3268 }
3269 
3270 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
3271                                  TargetLowering::DAGCombinerInfo &DCI,
3272                                  const LoongArchSubtarget &Subtarget) {
3273   if (DCI.isBeforeLegalizeOps())
3274     return SDValue();
3275 
3276   SDValue FirstOperand = N->getOperand(0);
3277   SDValue SecondOperand = N->getOperand(1);
3278   unsigned FirstOperandOpc = FirstOperand.getOpcode();
3279   EVT ValTy = N->getValueType(0);
3280   SDLoc DL(N);
3281   uint64_t lsb, msb;
3282   unsigned SMIdx, SMLen;
3283   ConstantSDNode *CN;
3284   SDValue NewOperand;
3285   MVT GRLenVT = Subtarget.getGRLenVT();
3286 
3287   // Op's second operand must be a shifted mask.
3288   if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3289       !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3290     return SDValue();
3291 
3292   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3293     // Pattern match BSTRPICK.
3294     //  $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3295     //  => BSTRPICK $dst, $src, msb, lsb
3296     //  where msb = lsb + len - 1
3297 
3298     // The second operand of the shift must be an immediate.
3299     if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3300       return SDValue();
3301 
3302     lsb = CN->getZExtValue();
3303 
3304     // Return if the shifted mask does not start at bit 0 or the sum of its
3305     // length and lsb exceeds the word's size.
3306     if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3307       return SDValue();
3308 
3309     NewOperand = FirstOperand.getOperand(0);
3310   } else {
3311     // Pattern match BSTRPICK.
3312     //  $dst = and $src, (2**len- 1) , if len > 12
3313     //  => BSTRPICK $dst, $src, msb, lsb
3314     //  where lsb = 0 and msb = len - 1
3315 
3316     // If the mask is <= 0xfff, andi can be used instead.
3317     if (CN->getZExtValue() <= 0xfff)
3318       return SDValue();
3319 
3320     // Return if the MSB exceeds.
3321     if (SMIdx + SMLen > ValTy.getSizeInBits())
3322       return SDValue();
3323 
3324     if (SMIdx > 0) {
3325       // Omit if the constant has more than 2 uses. This a conservative
3326       // decision. Whether it is a win depends on the HW microarchitecture.
3327       // However it should always be better for 1 and 2 uses.
3328       if (CN->use_size() > 2)
3329         return SDValue();
3330       // Return if the constant can be composed by a single LU12I.W.
3331       if ((CN->getZExtValue() & 0xfff) == 0)
3332         return SDValue();
3333       // Return if the constand can be composed by a single ADDI with
3334       // the zero register.
3335       if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3336         return SDValue();
3337     }
3338 
3339     lsb = SMIdx;
3340     NewOperand = FirstOperand;
3341   }
3342 
3343   msb = lsb + SMLen - 1;
3344   SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3345                             DAG.getConstant(msb, DL, GRLenVT),
3346                             DAG.getConstant(lsb, DL, GRLenVT));
3347   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3348     return NR0;
3349   // Try to optimize to
3350   //   bstrpick $Rd, $Rs, msb, lsb
3351   //   slli     $Rd, $Rd, lsb
3352   return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3353                      DAG.getConstant(lsb, DL, GRLenVT));
3354 }
3355 
3356 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
3357                                  TargetLowering::DAGCombinerInfo &DCI,
3358                                  const LoongArchSubtarget &Subtarget) {
3359   if (DCI.isBeforeLegalizeOps())
3360     return SDValue();
3361 
3362   // $dst = srl (and $src, Mask), Shamt
3363   // =>
3364   // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3365   // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3366   //
3367 
3368   SDValue FirstOperand = N->getOperand(0);
3369   ConstantSDNode *CN;
3370   EVT ValTy = N->getValueType(0);
3371   SDLoc DL(N);
3372   MVT GRLenVT = Subtarget.getGRLenVT();
3373   unsigned MaskIdx, MaskLen;
3374   uint64_t Shamt;
3375 
3376   // The first operand must be an AND and the second operand of the AND must be
3377   // a shifted mask.
3378   if (FirstOperand.getOpcode() != ISD::AND ||
3379       !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3380       !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3381     return SDValue();
3382 
3383   // The second operand (shift amount) must be an immediate.
3384   if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3385     return SDValue();
3386 
3387   Shamt = CN->getZExtValue();
3388   if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3389     return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3390                        FirstOperand->getOperand(0),
3391                        DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3392                        DAG.getConstant(Shamt, DL, GRLenVT));
3393 
3394   return SDValue();
3395 }
3396 
3397 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
3398                                 TargetLowering::DAGCombinerInfo &DCI,
3399                                 const LoongArchSubtarget &Subtarget) {
3400   MVT GRLenVT = Subtarget.getGRLenVT();
3401   EVT ValTy = N->getValueType(0);
3402   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3403   ConstantSDNode *CN0, *CN1;
3404   SDLoc DL(N);
3405   unsigned ValBits = ValTy.getSizeInBits();
3406   unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3407   unsigned Shamt;
3408   bool SwapAndRetried = false;
3409 
3410   if (DCI.isBeforeLegalizeOps())
3411     return SDValue();
3412 
3413   if (ValBits != 32 && ValBits != 64)
3414     return SDValue();
3415 
3416 Retry:
3417   // 1st pattern to match BSTRINS:
3418   //  R = or (and X, mask0), (and (shl Y, lsb), mask1)
3419   //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3420   //  =>
3421   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3422   if (N0.getOpcode() == ISD::AND &&
3423       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3424       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3425       N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3426       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3427       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3428       MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3429       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3430       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3431       (MaskIdx0 + MaskLen0 <= ValBits)) {
3432     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3433     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3434                        N1.getOperand(0).getOperand(0),
3435                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3436                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
3437   }
3438 
3439   // 2nd pattern to match BSTRINS:
3440   //  R = or (and X, mask0), (shl (and Y, mask1), lsb)
3441   //  where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3442   //  =>
3443   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3444   if (N0.getOpcode() == ISD::AND &&
3445       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3446       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3447       N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3448       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3449       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3450       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3451       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3452       MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3453       (MaskIdx0 + MaskLen0 <= ValBits)) {
3454     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3455     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3456                        N1.getOperand(0).getOperand(0),
3457                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3458                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
3459   }
3460 
3461   // 3rd pattern to match BSTRINS:
3462   //  R = or (and X, mask0), (and Y, mask1)
3463   //  where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3464   //  =>
3465   //  R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3466   //  where msb = lsb + size - 1
3467   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3468       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3469       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3470       (MaskIdx0 + MaskLen0 <= 64) &&
3471       (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3472       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3473     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3474     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3475                        DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3476                                    DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3477                        DAG.getConstant(ValBits == 32
3478                                            ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3479                                            : (MaskIdx0 + MaskLen0 - 1),
3480                                        DL, GRLenVT),
3481                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
3482   }
3483 
3484   // 4th pattern to match BSTRINS:
3485   //  R = or (and X, mask), (shl Y, shamt)
3486   //  where mask = (2**shamt - 1)
3487   //  =>
3488   //  R = BSTRINS X, Y, ValBits - 1, shamt
3489   //  where ValBits = 32 or 64
3490   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3491       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3492       isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3493       MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3494       (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3495       (MaskIdx0 + MaskLen0 <= ValBits)) {
3496     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3497     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3498                        N1.getOperand(0),
3499                        DAG.getConstant((ValBits - 1), DL, GRLenVT),
3500                        DAG.getConstant(Shamt, DL, GRLenVT));
3501   }
3502 
3503   // 5th pattern to match BSTRINS:
3504   //  R = or (and X, mask), const
3505   //  where ~mask = (2**size - 1) << lsb, mask & const = 0
3506   //  =>
3507   //  R = BSTRINS X, (const >> lsb), msb, lsb
3508   //  where msb = lsb + size - 1
3509   if (N0.getOpcode() == ISD::AND &&
3510       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3511       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3512       (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3513       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3514     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3515     return DAG.getNode(
3516         LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3517         DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3518         DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3519                                       : (MaskIdx0 + MaskLen0 - 1),
3520                         DL, GRLenVT),
3521         DAG.getConstant(MaskIdx0, DL, GRLenVT));
3522   }
3523 
3524   // 6th pattern.
3525   // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3526   // by the incoming bits are known to be zero.
3527   // =>
3528   // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3529   //
3530   // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3531   // pattern is more common than the 1st. So we put the 1st before the 6th in
3532   // order to match as many nodes as possible.
3533   ConstantSDNode *CNMask, *CNShamt;
3534   unsigned MaskIdx, MaskLen;
3535   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3536       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3537       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3538       MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3539       CNShamt->getZExtValue() + MaskLen <= ValBits) {
3540     Shamt = CNShamt->getZExtValue();
3541     APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3542     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3543       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3544       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3545                          N1.getOperand(0).getOperand(0),
3546                          DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3547                          DAG.getConstant(Shamt, DL, GRLenVT));
3548     }
3549   }
3550 
3551   // 7th pattern.
3552   // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3553   // overwritten by the incoming bits are known to be zero.
3554   // =>
3555   // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3556   //
3557   // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3558   // before the 7th in order to match as many nodes as possible.
3559   if (N1.getOpcode() == ISD::AND &&
3560       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3561       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3562       N1.getOperand(0).getOpcode() == ISD::SHL &&
3563       (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3564       CNShamt->getZExtValue() == MaskIdx) {
3565     APInt ShMask(ValBits, CNMask->getZExtValue());
3566     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3567       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3568       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3569                          N1.getOperand(0).getOperand(0),
3570                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3571                          DAG.getConstant(MaskIdx, DL, GRLenVT));
3572     }
3573   }
3574 
3575   // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3576   if (!SwapAndRetried) {
3577     std::swap(N0, N1);
3578     SwapAndRetried = true;
3579     goto Retry;
3580   }
3581 
3582   SwapAndRetried = false;
3583 Retry2:
3584   // 8th pattern.
3585   // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3586   // the incoming bits are known to be zero.
3587   // =>
3588   // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3589   //
3590   // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3591   // we put it here in order to match as many nodes as possible or generate less
3592   // instructions.
3593   if (N1.getOpcode() == ISD::AND &&
3594       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3595       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3596     APInt ShMask(ValBits, CNMask->getZExtValue());
3597     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3598       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3599       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3600                          DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3601                                      N1->getOperand(0),
3602                                      DAG.getConstant(MaskIdx, DL, GRLenVT)),
3603                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3604                          DAG.getConstant(MaskIdx, DL, GRLenVT));
3605     }
3606   }
3607   // Swap N0/N1 and retry.
3608   if (!SwapAndRetried) {
3609     std::swap(N0, N1);
3610     SwapAndRetried = true;
3611     goto Retry2;
3612   }
3613 
3614   return SDValue();
3615 }
3616 
3617 static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3618   ExtType = ISD::NON_EXTLOAD;
3619 
3620   switch (V.getNode()->getOpcode()) {
3621   case ISD::LOAD: {
3622     LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3623     if ((LoadNode->getMemoryVT() == MVT::i8) ||
3624         (LoadNode->getMemoryVT() == MVT::i16)) {
3625       ExtType = LoadNode->getExtensionType();
3626       return true;
3627     }
3628     return false;
3629   }
3630   case ISD::AssertSext: {
3631     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3632     if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3633       ExtType = ISD::SEXTLOAD;
3634       return true;
3635     }
3636     return false;
3637   }
3638   case ISD::AssertZext: {
3639     VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3640     if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3641       ExtType = ISD::ZEXTLOAD;
3642       return true;
3643     }
3644     return false;
3645   }
3646   default:
3647     return false;
3648   }
3649 
3650   return false;
3651 }
3652 
3653 // Eliminate redundant truncation and zero-extension nodes.
3654 // * Case 1:
3655 //  +------------+ +------------+ +------------+
3656 //  |   Input1   | |   Input2   | |     CC     |
3657 //  +------------+ +------------+ +------------+
3658 //         |              |              |
3659 //         V              V              +----+
3660 //  +------------+ +------------+             |
3661 //  |  TRUNCATE  | |  TRUNCATE  |             |
3662 //  +------------+ +------------+             |
3663 //         |              |                   |
3664 //         V              V                   |
3665 //  +------------+ +------------+             |
3666 //  |  ZERO_EXT  | |  ZERO_EXT  |             |
3667 //  +------------+ +------------+             |
3668 //         |              |                   |
3669 //         |              +-------------+     |
3670 //         V              V             |     |
3671 //        +----------------+            |     |
3672 //        |      AND       |            |     |
3673 //        +----------------+            |     |
3674 //                |                     |     |
3675 //                +---------------+     |     |
3676 //                                |     |     |
3677 //                                V     V     V
3678 //                               +-------------+
3679 //                               |     CMP     |
3680 //                               +-------------+
3681 // * Case 2:
3682 //  +------------+ +------------+ +-------------+ +------------+ +------------+
3683 //  |   Input1   | |   Input2   | | Constant -1 | | Constant 0 | |     CC     |
3684 //  +------------+ +------------+ +-------------+ +------------+ +------------+
3685 //         |              |             |               |               |
3686 //         V              |             |               |               |
3687 //  +------------+        |             |               |               |
3688 //  |     XOR    |<---------------------+               |               |
3689 //  +------------+        |                             |               |
3690 //         |              |                             |               |
3691 //         V              V             +---------------+               |
3692 //  +------------+ +------------+       |                               |
3693 //  |  TRUNCATE  | |  TRUNCATE  |       |     +-------------------------+
3694 //  +------------+ +------------+       |     |
3695 //         |              |             |     |
3696 //         V              V             |     |
3697 //  +------------+ +------------+       |     |
3698 //  |  ZERO_EXT  | |  ZERO_EXT  |       |     |
3699 //  +------------+ +------------+       |     |
3700 //         |              |             |     |
3701 //         V              V             |     |
3702 //        +----------------+            |     |
3703 //        |      AND       |            |     |
3704 //        +----------------+            |     |
3705 //                |                     |     |
3706 //                +---------------+     |     |
3707 //                                |     |     |
3708 //                                V     V     V
3709 //                               +-------------+
3710 //                               |     CMP     |
3711 //                               +-------------+
3712 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
3713                                    TargetLowering::DAGCombinerInfo &DCI,
3714                                    const LoongArchSubtarget &Subtarget) {
3715   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3716 
3717   SDNode *AndNode = N->getOperand(0).getNode();
3718   if (AndNode->getOpcode() != ISD::AND)
3719     return SDValue();
3720 
3721   SDValue AndInputValue2 = AndNode->getOperand(1);
3722   if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3723     return SDValue();
3724 
3725   SDValue CmpInputValue = N->getOperand(1);
3726   SDValue AndInputValue1 = AndNode->getOperand(0);
3727   if (AndInputValue1.getOpcode() == ISD::XOR) {
3728     if (CC != ISD::SETEQ && CC != ISD::SETNE)
3729       return SDValue();
3730     ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3731     if (!CN || CN->getSExtValue() != -1)
3732       return SDValue();
3733     CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3734     if (!CN || CN->getSExtValue() != 0)
3735       return SDValue();
3736     AndInputValue1 = AndInputValue1.getOperand(0);
3737     if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3738       return SDValue();
3739   } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3740     if (AndInputValue2 != CmpInputValue)
3741       return SDValue();
3742   } else {
3743     return SDValue();
3744   }
3745 
3746   SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3747   if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3748     return SDValue();
3749 
3750   SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3751   if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3752     return SDValue();
3753 
3754   SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3755   SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3756   ISD::LoadExtType ExtType1;
3757   ISD::LoadExtType ExtType2;
3758 
3759   if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3760       !checkValueWidth(TruncInputValue2, ExtType2))
3761     return SDValue();
3762 
3763   if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3764       AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3765     return SDValue();
3766 
3767   if ((ExtType2 != ISD::ZEXTLOAD) &&
3768       ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3769     return SDValue();
3770 
3771   // These truncation and zero-extension nodes are not necessary, remove them.
3772   SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3773                                TruncInputValue1, TruncInputValue2);
3774   SDValue NewSetCC =
3775       DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3776   DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3777   return SDValue(N, 0);
3778 }
3779 
3780 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3781 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
3782                                       TargetLowering::DAGCombinerInfo &DCI,
3783                                       const LoongArchSubtarget &Subtarget) {
3784   if (DCI.isBeforeLegalizeOps())
3785     return SDValue();
3786 
3787   SDValue Src = N->getOperand(0);
3788   if (Src.getOpcode() != LoongArchISD::REVB_2W)
3789     return SDValue();
3790 
3791   return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3792                      Src.getOperand(0));
3793 }
3794 
3795 template <unsigned N>
3796 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
3797                                        SelectionDAG &DAG,
3798                                        const LoongArchSubtarget &Subtarget,
3799                                        bool IsSigned = false) {
3800   SDLoc DL(Node);
3801   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3802   // Check the ImmArg.
3803   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3804       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3805     DAG.getContext()->emitError(Node->getOperationName(0) +
3806                                 ": argument out of range.");
3807     return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3808   }
3809   return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3810 }
3811 
3812 template <unsigned N>
3813 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3814                                    SelectionDAG &DAG, bool IsSigned = false) {
3815   SDLoc DL(Node);
3816   EVT ResTy = Node->getValueType(0);
3817   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3818 
3819   // Check the ImmArg.
3820   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3821       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3822     DAG.getContext()->emitError(Node->getOperationName(0) +
3823                                 ": argument out of range.");
3824     return DAG.getNode(ISD::UNDEF, DL, ResTy);
3825   }
3826   return DAG.getConstant(
3827       APInt(ResTy.getScalarType().getSizeInBits(),
3828             IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3829       DL, ResTy);
3830 }
3831 
3832 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
3833   SDLoc DL(Node);
3834   EVT ResTy = Node->getValueType(0);
3835   SDValue Vec = Node->getOperand(2);
3836   SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3837   return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3838 }
3839 
3840 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
3841   SDLoc DL(Node);
3842   EVT ResTy = Node->getValueType(0);
3843   SDValue One = DAG.getConstant(1, DL, ResTy);
3844   SDValue Bit =
3845       DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3846 
3847   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3848                      DAG.getNOT(DL, Bit, ResTy));
3849 }
3850 
3851 template <unsigned N>
3852 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
3853   SDLoc DL(Node);
3854   EVT ResTy = Node->getValueType(0);
3855   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3856   // Check the unsigned ImmArg.
3857   if (!isUInt<N>(CImm->getZExtValue())) {
3858     DAG.getContext()->emitError(Node->getOperationName(0) +
3859                                 ": argument out of range.");
3860     return DAG.getNode(ISD::UNDEF, DL, ResTy);
3861   }
3862 
3863   APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3864   SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3865 
3866   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3867 }
3868 
3869 template <unsigned N>
3870 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
3871   SDLoc DL(Node);
3872   EVT ResTy = Node->getValueType(0);
3873   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3874   // Check the unsigned ImmArg.
3875   if (!isUInt<N>(CImm->getZExtValue())) {
3876     DAG.getContext()->emitError(Node->getOperationName(0) +
3877                                 ": argument out of range.");
3878     return DAG.getNode(ISD::UNDEF, DL, ResTy);
3879   }
3880 
3881   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3882   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3883   return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3884 }
3885 
3886 template <unsigned N>
3887 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
3888   SDLoc DL(Node);
3889   EVT ResTy = Node->getValueType(0);
3890   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3891   // Check the unsigned ImmArg.
3892   if (!isUInt<N>(CImm->getZExtValue())) {
3893     DAG.getContext()->emitError(Node->getOperationName(0) +
3894                                 ": argument out of range.");
3895     return DAG.getNode(ISD::UNDEF, DL, ResTy);
3896   }
3897 
3898   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3899   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3900   return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3901 }
3902 
3903 static SDValue
3904 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
3905                                  TargetLowering::DAGCombinerInfo &DCI,
3906                                  const LoongArchSubtarget &Subtarget) {
3907   SDLoc DL(N);
3908   switch (N->getConstantOperandVal(0)) {
3909   default:
3910     break;
3911   case Intrinsic::loongarch_lsx_vadd_b:
3912   case Intrinsic::loongarch_lsx_vadd_h:
3913   case Intrinsic::loongarch_lsx_vadd_w:
3914   case Intrinsic::loongarch_lsx_vadd_d:
3915   case Intrinsic::loongarch_lasx_xvadd_b:
3916   case Intrinsic::loongarch_lasx_xvadd_h:
3917   case Intrinsic::loongarch_lasx_xvadd_w:
3918   case Intrinsic::loongarch_lasx_xvadd_d:
3919     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3920                        N->getOperand(2));
3921   case Intrinsic::loongarch_lsx_vaddi_bu:
3922   case Intrinsic::loongarch_lsx_vaddi_hu:
3923   case Intrinsic::loongarch_lsx_vaddi_wu:
3924   case Intrinsic::loongarch_lsx_vaddi_du:
3925   case Intrinsic::loongarch_lasx_xvaddi_bu:
3926   case Intrinsic::loongarch_lasx_xvaddi_hu:
3927   case Intrinsic::loongarch_lasx_xvaddi_wu:
3928   case Intrinsic::loongarch_lasx_xvaddi_du:
3929     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3930                        lowerVectorSplatImm<5>(N, 2, DAG));
3931   case Intrinsic::loongarch_lsx_vsub_b:
3932   case Intrinsic::loongarch_lsx_vsub_h:
3933   case Intrinsic::loongarch_lsx_vsub_w:
3934   case Intrinsic::loongarch_lsx_vsub_d:
3935   case Intrinsic::loongarch_lasx_xvsub_b:
3936   case Intrinsic::loongarch_lasx_xvsub_h:
3937   case Intrinsic::loongarch_lasx_xvsub_w:
3938   case Intrinsic::loongarch_lasx_xvsub_d:
3939     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3940                        N->getOperand(2));
3941   case Intrinsic::loongarch_lsx_vsubi_bu:
3942   case Intrinsic::loongarch_lsx_vsubi_hu:
3943   case Intrinsic::loongarch_lsx_vsubi_wu:
3944   case Intrinsic::loongarch_lsx_vsubi_du:
3945   case Intrinsic::loongarch_lasx_xvsubi_bu:
3946   case Intrinsic::loongarch_lasx_xvsubi_hu:
3947   case Intrinsic::loongarch_lasx_xvsubi_wu:
3948   case Intrinsic::loongarch_lasx_xvsubi_du:
3949     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3950                        lowerVectorSplatImm<5>(N, 2, DAG));
3951   case Intrinsic::loongarch_lsx_vneg_b:
3952   case Intrinsic::loongarch_lsx_vneg_h:
3953   case Intrinsic::loongarch_lsx_vneg_w:
3954   case Intrinsic::loongarch_lsx_vneg_d:
3955   case Intrinsic::loongarch_lasx_xvneg_b:
3956   case Intrinsic::loongarch_lasx_xvneg_h:
3957   case Intrinsic::loongarch_lasx_xvneg_w:
3958   case Intrinsic::loongarch_lasx_xvneg_d:
3959     return DAG.getNode(
3960         ISD::SUB, DL, N->getValueType(0),
3961         DAG.getConstant(
3962             APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3963                   /*isSigned=*/true),
3964             SDLoc(N), N->getValueType(0)),
3965         N->getOperand(1));
3966   case Intrinsic::loongarch_lsx_vmax_b:
3967   case Intrinsic::loongarch_lsx_vmax_h:
3968   case Intrinsic::loongarch_lsx_vmax_w:
3969   case Intrinsic::loongarch_lsx_vmax_d:
3970   case Intrinsic::loongarch_lasx_xvmax_b:
3971   case Intrinsic::loongarch_lasx_xvmax_h:
3972   case Intrinsic::loongarch_lasx_xvmax_w:
3973   case Intrinsic::loongarch_lasx_xvmax_d:
3974     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3975                        N->getOperand(2));
3976   case Intrinsic::loongarch_lsx_vmax_bu:
3977   case Intrinsic::loongarch_lsx_vmax_hu:
3978   case Intrinsic::loongarch_lsx_vmax_wu:
3979   case Intrinsic::loongarch_lsx_vmax_du:
3980   case Intrinsic::loongarch_lasx_xvmax_bu:
3981   case Intrinsic::loongarch_lasx_xvmax_hu:
3982   case Intrinsic::loongarch_lasx_xvmax_wu:
3983   case Intrinsic::loongarch_lasx_xvmax_du:
3984     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3985                        N->getOperand(2));
3986   case Intrinsic::loongarch_lsx_vmaxi_b:
3987   case Intrinsic::loongarch_lsx_vmaxi_h:
3988   case Intrinsic::loongarch_lsx_vmaxi_w:
3989   case Intrinsic::loongarch_lsx_vmaxi_d:
3990   case Intrinsic::loongarch_lasx_xvmaxi_b:
3991   case Intrinsic::loongarch_lasx_xvmaxi_h:
3992   case Intrinsic::loongarch_lasx_xvmaxi_w:
3993   case Intrinsic::loongarch_lasx_xvmaxi_d:
3994     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3995                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3996   case Intrinsic::loongarch_lsx_vmaxi_bu:
3997   case Intrinsic::loongarch_lsx_vmaxi_hu:
3998   case Intrinsic::loongarch_lsx_vmaxi_wu:
3999   case Intrinsic::loongarch_lsx_vmaxi_du:
4000   case Intrinsic::loongarch_lasx_xvmaxi_bu:
4001   case Intrinsic::loongarch_lasx_xvmaxi_hu:
4002   case Intrinsic::loongarch_lasx_xvmaxi_wu:
4003   case Intrinsic::loongarch_lasx_xvmaxi_du:
4004     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
4005                        lowerVectorSplatImm<5>(N, 2, DAG));
4006   case Intrinsic::loongarch_lsx_vmin_b:
4007   case Intrinsic::loongarch_lsx_vmin_h:
4008   case Intrinsic::loongarch_lsx_vmin_w:
4009   case Intrinsic::loongarch_lsx_vmin_d:
4010   case Intrinsic::loongarch_lasx_xvmin_b:
4011   case Intrinsic::loongarch_lasx_xvmin_h:
4012   case Intrinsic::loongarch_lasx_xvmin_w:
4013   case Intrinsic::loongarch_lasx_xvmin_d:
4014     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
4015                        N->getOperand(2));
4016   case Intrinsic::loongarch_lsx_vmin_bu:
4017   case Intrinsic::loongarch_lsx_vmin_hu:
4018   case Intrinsic::loongarch_lsx_vmin_wu:
4019   case Intrinsic::loongarch_lsx_vmin_du:
4020   case Intrinsic::loongarch_lasx_xvmin_bu:
4021   case Intrinsic::loongarch_lasx_xvmin_hu:
4022   case Intrinsic::loongarch_lasx_xvmin_wu:
4023   case Intrinsic::loongarch_lasx_xvmin_du:
4024     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
4025                        N->getOperand(2));
4026   case Intrinsic::loongarch_lsx_vmini_b:
4027   case Intrinsic::loongarch_lsx_vmini_h:
4028   case Intrinsic::loongarch_lsx_vmini_w:
4029   case Intrinsic::loongarch_lsx_vmini_d:
4030   case Intrinsic::loongarch_lasx_xvmini_b:
4031   case Intrinsic::loongarch_lasx_xvmini_h:
4032   case Intrinsic::loongarch_lasx_xvmini_w:
4033   case Intrinsic::loongarch_lasx_xvmini_d:
4034     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
4035                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
4036   case Intrinsic::loongarch_lsx_vmini_bu:
4037   case Intrinsic::loongarch_lsx_vmini_hu:
4038   case Intrinsic::loongarch_lsx_vmini_wu:
4039   case Intrinsic::loongarch_lsx_vmini_du:
4040   case Intrinsic::loongarch_lasx_xvmini_bu:
4041   case Intrinsic::loongarch_lasx_xvmini_hu:
4042   case Intrinsic::loongarch_lasx_xvmini_wu:
4043   case Intrinsic::loongarch_lasx_xvmini_du:
4044     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
4045                        lowerVectorSplatImm<5>(N, 2, DAG));
4046   case Intrinsic::loongarch_lsx_vmul_b:
4047   case Intrinsic::loongarch_lsx_vmul_h:
4048   case Intrinsic::loongarch_lsx_vmul_w:
4049   case Intrinsic::loongarch_lsx_vmul_d:
4050   case Intrinsic::loongarch_lasx_xvmul_b:
4051   case Intrinsic::loongarch_lasx_xvmul_h:
4052   case Intrinsic::loongarch_lasx_xvmul_w:
4053   case Intrinsic::loongarch_lasx_xvmul_d:
4054     return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
4055                        N->getOperand(2));
4056   case Intrinsic::loongarch_lsx_vmadd_b:
4057   case Intrinsic::loongarch_lsx_vmadd_h:
4058   case Intrinsic::loongarch_lsx_vmadd_w:
4059   case Intrinsic::loongarch_lsx_vmadd_d:
4060   case Intrinsic::loongarch_lasx_xvmadd_b:
4061   case Intrinsic::loongarch_lasx_xvmadd_h:
4062   case Intrinsic::loongarch_lasx_xvmadd_w:
4063   case Intrinsic::loongarch_lasx_xvmadd_d: {
4064     EVT ResTy = N->getValueType(0);
4065     return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
4066                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4067                                    N->getOperand(3)));
4068   }
4069   case Intrinsic::loongarch_lsx_vmsub_b:
4070   case Intrinsic::loongarch_lsx_vmsub_h:
4071   case Intrinsic::loongarch_lsx_vmsub_w:
4072   case Intrinsic::loongarch_lsx_vmsub_d:
4073   case Intrinsic::loongarch_lasx_xvmsub_b:
4074   case Intrinsic::loongarch_lasx_xvmsub_h:
4075   case Intrinsic::loongarch_lasx_xvmsub_w:
4076   case Intrinsic::loongarch_lasx_xvmsub_d: {
4077     EVT ResTy = N->getValueType(0);
4078     return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
4079                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4080                                    N->getOperand(3)));
4081   }
4082   case Intrinsic::loongarch_lsx_vdiv_b:
4083   case Intrinsic::loongarch_lsx_vdiv_h:
4084   case Intrinsic::loongarch_lsx_vdiv_w:
4085   case Intrinsic::loongarch_lsx_vdiv_d:
4086   case Intrinsic::loongarch_lasx_xvdiv_b:
4087   case Intrinsic::loongarch_lasx_xvdiv_h:
4088   case Intrinsic::loongarch_lasx_xvdiv_w:
4089   case Intrinsic::loongarch_lasx_xvdiv_d:
4090     return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
4091                        N->getOperand(2));
4092   case Intrinsic::loongarch_lsx_vdiv_bu:
4093   case Intrinsic::loongarch_lsx_vdiv_hu:
4094   case Intrinsic::loongarch_lsx_vdiv_wu:
4095   case Intrinsic::loongarch_lsx_vdiv_du:
4096   case Intrinsic::loongarch_lasx_xvdiv_bu:
4097   case Intrinsic::loongarch_lasx_xvdiv_hu:
4098   case Intrinsic::loongarch_lasx_xvdiv_wu:
4099   case Intrinsic::loongarch_lasx_xvdiv_du:
4100     return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
4101                        N->getOperand(2));
4102   case Intrinsic::loongarch_lsx_vmod_b:
4103   case Intrinsic::loongarch_lsx_vmod_h:
4104   case Intrinsic::loongarch_lsx_vmod_w:
4105   case Intrinsic::loongarch_lsx_vmod_d:
4106   case Intrinsic::loongarch_lasx_xvmod_b:
4107   case Intrinsic::loongarch_lasx_xvmod_h:
4108   case Intrinsic::loongarch_lasx_xvmod_w:
4109   case Intrinsic::loongarch_lasx_xvmod_d:
4110     return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
4111                        N->getOperand(2));
4112   case Intrinsic::loongarch_lsx_vmod_bu:
4113   case Intrinsic::loongarch_lsx_vmod_hu:
4114   case Intrinsic::loongarch_lsx_vmod_wu:
4115   case Intrinsic::loongarch_lsx_vmod_du:
4116   case Intrinsic::loongarch_lasx_xvmod_bu:
4117   case Intrinsic::loongarch_lasx_xvmod_hu:
4118   case Intrinsic::loongarch_lasx_xvmod_wu:
4119   case Intrinsic::loongarch_lasx_xvmod_du:
4120     return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
4121                        N->getOperand(2));
4122   case Intrinsic::loongarch_lsx_vand_v:
4123   case Intrinsic::loongarch_lasx_xvand_v:
4124     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4125                        N->getOperand(2));
4126   case Intrinsic::loongarch_lsx_vor_v:
4127   case Intrinsic::loongarch_lasx_xvor_v:
4128     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4129                        N->getOperand(2));
4130   case Intrinsic::loongarch_lsx_vxor_v:
4131   case Intrinsic::loongarch_lasx_xvxor_v:
4132     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4133                        N->getOperand(2));
4134   case Intrinsic::loongarch_lsx_vnor_v:
4135   case Intrinsic::loongarch_lasx_xvnor_v: {
4136     SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4137                               N->getOperand(2));
4138     return DAG.getNOT(DL, Res, Res->getValueType(0));
4139   }
4140   case Intrinsic::loongarch_lsx_vandi_b:
4141   case Intrinsic::loongarch_lasx_xvandi_b:
4142     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4143                        lowerVectorSplatImm<8>(N, 2, DAG));
4144   case Intrinsic::loongarch_lsx_vori_b:
4145   case Intrinsic::loongarch_lasx_xvori_b:
4146     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4147                        lowerVectorSplatImm<8>(N, 2, DAG));
4148   case Intrinsic::loongarch_lsx_vxori_b:
4149   case Intrinsic::loongarch_lasx_xvxori_b:
4150     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4151                        lowerVectorSplatImm<8>(N, 2, DAG));
4152   case Intrinsic::loongarch_lsx_vsll_b:
4153   case Intrinsic::loongarch_lsx_vsll_h:
4154   case Intrinsic::loongarch_lsx_vsll_w:
4155   case Intrinsic::loongarch_lsx_vsll_d:
4156   case Intrinsic::loongarch_lasx_xvsll_b:
4157   case Intrinsic::loongarch_lasx_xvsll_h:
4158   case Intrinsic::loongarch_lasx_xvsll_w:
4159   case Intrinsic::loongarch_lasx_xvsll_d:
4160     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4161                        truncateVecElts(N, DAG));
4162   case Intrinsic::loongarch_lsx_vslli_b:
4163   case Intrinsic::loongarch_lasx_xvslli_b:
4164     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4165                        lowerVectorSplatImm<3>(N, 2, DAG));
4166   case Intrinsic::loongarch_lsx_vslli_h:
4167   case Intrinsic::loongarch_lasx_xvslli_h:
4168     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4169                        lowerVectorSplatImm<4>(N, 2, DAG));
4170   case Intrinsic::loongarch_lsx_vslli_w:
4171   case Intrinsic::loongarch_lasx_xvslli_w:
4172     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4173                        lowerVectorSplatImm<5>(N, 2, DAG));
4174   case Intrinsic::loongarch_lsx_vslli_d:
4175   case Intrinsic::loongarch_lasx_xvslli_d:
4176     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4177                        lowerVectorSplatImm<6>(N, 2, DAG));
4178   case Intrinsic::loongarch_lsx_vsrl_b:
4179   case Intrinsic::loongarch_lsx_vsrl_h:
4180   case Intrinsic::loongarch_lsx_vsrl_w:
4181   case Intrinsic::loongarch_lsx_vsrl_d:
4182   case Intrinsic::loongarch_lasx_xvsrl_b:
4183   case Intrinsic::loongarch_lasx_xvsrl_h:
4184   case Intrinsic::loongarch_lasx_xvsrl_w:
4185   case Intrinsic::loongarch_lasx_xvsrl_d:
4186     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4187                        truncateVecElts(N, DAG));
4188   case Intrinsic::loongarch_lsx_vsrli_b:
4189   case Intrinsic::loongarch_lasx_xvsrli_b:
4190     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4191                        lowerVectorSplatImm<3>(N, 2, DAG));
4192   case Intrinsic::loongarch_lsx_vsrli_h:
4193   case Intrinsic::loongarch_lasx_xvsrli_h:
4194     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4195                        lowerVectorSplatImm<4>(N, 2, DAG));
4196   case Intrinsic::loongarch_lsx_vsrli_w:
4197   case Intrinsic::loongarch_lasx_xvsrli_w:
4198     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4199                        lowerVectorSplatImm<5>(N, 2, DAG));
4200   case Intrinsic::loongarch_lsx_vsrli_d:
4201   case Intrinsic::loongarch_lasx_xvsrli_d:
4202     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4203                        lowerVectorSplatImm<6>(N, 2, DAG));
4204   case Intrinsic::loongarch_lsx_vsra_b:
4205   case Intrinsic::loongarch_lsx_vsra_h:
4206   case Intrinsic::loongarch_lsx_vsra_w:
4207   case Intrinsic::loongarch_lsx_vsra_d:
4208   case Intrinsic::loongarch_lasx_xvsra_b:
4209   case Intrinsic::loongarch_lasx_xvsra_h:
4210   case Intrinsic::loongarch_lasx_xvsra_w:
4211   case Intrinsic::loongarch_lasx_xvsra_d:
4212     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4213                        truncateVecElts(N, DAG));
4214   case Intrinsic::loongarch_lsx_vsrai_b:
4215   case Intrinsic::loongarch_lasx_xvsrai_b:
4216     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4217                        lowerVectorSplatImm<3>(N, 2, DAG));
4218   case Intrinsic::loongarch_lsx_vsrai_h:
4219   case Intrinsic::loongarch_lasx_xvsrai_h:
4220     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4221                        lowerVectorSplatImm<4>(N, 2, DAG));
4222   case Intrinsic::loongarch_lsx_vsrai_w:
4223   case Intrinsic::loongarch_lasx_xvsrai_w:
4224     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4225                        lowerVectorSplatImm<5>(N, 2, DAG));
4226   case Intrinsic::loongarch_lsx_vsrai_d:
4227   case Intrinsic::loongarch_lasx_xvsrai_d:
4228     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4229                        lowerVectorSplatImm<6>(N, 2, DAG));
4230   case Intrinsic::loongarch_lsx_vclz_b:
4231   case Intrinsic::loongarch_lsx_vclz_h:
4232   case Intrinsic::loongarch_lsx_vclz_w:
4233   case Intrinsic::loongarch_lsx_vclz_d:
4234   case Intrinsic::loongarch_lasx_xvclz_b:
4235   case Intrinsic::loongarch_lasx_xvclz_h:
4236   case Intrinsic::loongarch_lasx_xvclz_w:
4237   case Intrinsic::loongarch_lasx_xvclz_d:
4238     return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4239   case Intrinsic::loongarch_lsx_vpcnt_b:
4240   case Intrinsic::loongarch_lsx_vpcnt_h:
4241   case Intrinsic::loongarch_lsx_vpcnt_w:
4242   case Intrinsic::loongarch_lsx_vpcnt_d:
4243   case Intrinsic::loongarch_lasx_xvpcnt_b:
4244   case Intrinsic::loongarch_lasx_xvpcnt_h:
4245   case Intrinsic::loongarch_lasx_xvpcnt_w:
4246   case Intrinsic::loongarch_lasx_xvpcnt_d:
4247     return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4248   case Intrinsic::loongarch_lsx_vbitclr_b:
4249   case Intrinsic::loongarch_lsx_vbitclr_h:
4250   case Intrinsic::loongarch_lsx_vbitclr_w:
4251   case Intrinsic::loongarch_lsx_vbitclr_d:
4252   case Intrinsic::loongarch_lasx_xvbitclr_b:
4253   case Intrinsic::loongarch_lasx_xvbitclr_h:
4254   case Intrinsic::loongarch_lasx_xvbitclr_w:
4255   case Intrinsic::loongarch_lasx_xvbitclr_d:
4256     return lowerVectorBitClear(N, DAG);
4257   case Intrinsic::loongarch_lsx_vbitclri_b:
4258   case Intrinsic::loongarch_lasx_xvbitclri_b:
4259     return lowerVectorBitClearImm<3>(N, DAG);
4260   case Intrinsic::loongarch_lsx_vbitclri_h:
4261   case Intrinsic::loongarch_lasx_xvbitclri_h:
4262     return lowerVectorBitClearImm<4>(N, DAG);
4263   case Intrinsic::loongarch_lsx_vbitclri_w:
4264   case Intrinsic::loongarch_lasx_xvbitclri_w:
4265     return lowerVectorBitClearImm<5>(N, DAG);
4266   case Intrinsic::loongarch_lsx_vbitclri_d:
4267   case Intrinsic::loongarch_lasx_xvbitclri_d:
4268     return lowerVectorBitClearImm<6>(N, DAG);
4269   case Intrinsic::loongarch_lsx_vbitset_b:
4270   case Intrinsic::loongarch_lsx_vbitset_h:
4271   case Intrinsic::loongarch_lsx_vbitset_w:
4272   case Intrinsic::loongarch_lsx_vbitset_d:
4273   case Intrinsic::loongarch_lasx_xvbitset_b:
4274   case Intrinsic::loongarch_lasx_xvbitset_h:
4275   case Intrinsic::loongarch_lasx_xvbitset_w:
4276   case Intrinsic::loongarch_lasx_xvbitset_d: {
4277     EVT VecTy = N->getValueType(0);
4278     SDValue One = DAG.getConstant(1, DL, VecTy);
4279     return DAG.getNode(
4280         ISD::OR, DL, VecTy, N->getOperand(1),
4281         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4282   }
4283   case Intrinsic::loongarch_lsx_vbitseti_b:
4284   case Intrinsic::loongarch_lasx_xvbitseti_b:
4285     return lowerVectorBitSetImm<3>(N, DAG);
4286   case Intrinsic::loongarch_lsx_vbitseti_h:
4287   case Intrinsic::loongarch_lasx_xvbitseti_h:
4288     return lowerVectorBitSetImm<4>(N, DAG);
4289   case Intrinsic::loongarch_lsx_vbitseti_w:
4290   case Intrinsic::loongarch_lasx_xvbitseti_w:
4291     return lowerVectorBitSetImm<5>(N, DAG);
4292   case Intrinsic::loongarch_lsx_vbitseti_d:
4293   case Intrinsic::loongarch_lasx_xvbitseti_d:
4294     return lowerVectorBitSetImm<6>(N, DAG);
4295   case Intrinsic::loongarch_lsx_vbitrev_b:
4296   case Intrinsic::loongarch_lsx_vbitrev_h:
4297   case Intrinsic::loongarch_lsx_vbitrev_w:
4298   case Intrinsic::loongarch_lsx_vbitrev_d:
4299   case Intrinsic::loongarch_lasx_xvbitrev_b:
4300   case Intrinsic::loongarch_lasx_xvbitrev_h:
4301   case Intrinsic::loongarch_lasx_xvbitrev_w:
4302   case Intrinsic::loongarch_lasx_xvbitrev_d: {
4303     EVT VecTy = N->getValueType(0);
4304     SDValue One = DAG.getConstant(1, DL, VecTy);
4305     return DAG.getNode(
4306         ISD::XOR, DL, VecTy, N->getOperand(1),
4307         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4308   }
4309   case Intrinsic::loongarch_lsx_vbitrevi_b:
4310   case Intrinsic::loongarch_lasx_xvbitrevi_b:
4311     return lowerVectorBitRevImm<3>(N, DAG);
4312   case Intrinsic::loongarch_lsx_vbitrevi_h:
4313   case Intrinsic::loongarch_lasx_xvbitrevi_h:
4314     return lowerVectorBitRevImm<4>(N, DAG);
4315   case Intrinsic::loongarch_lsx_vbitrevi_w:
4316   case Intrinsic::loongarch_lasx_xvbitrevi_w:
4317     return lowerVectorBitRevImm<5>(N, DAG);
4318   case Intrinsic::loongarch_lsx_vbitrevi_d:
4319   case Intrinsic::loongarch_lasx_xvbitrevi_d:
4320     return lowerVectorBitRevImm<6>(N, DAG);
4321   case Intrinsic::loongarch_lsx_vfadd_s:
4322   case Intrinsic::loongarch_lsx_vfadd_d:
4323   case Intrinsic::loongarch_lasx_xvfadd_s:
4324   case Intrinsic::loongarch_lasx_xvfadd_d:
4325     return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4326                        N->getOperand(2));
4327   case Intrinsic::loongarch_lsx_vfsub_s:
4328   case Intrinsic::loongarch_lsx_vfsub_d:
4329   case Intrinsic::loongarch_lasx_xvfsub_s:
4330   case Intrinsic::loongarch_lasx_xvfsub_d:
4331     return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4332                        N->getOperand(2));
4333   case Intrinsic::loongarch_lsx_vfmul_s:
4334   case Intrinsic::loongarch_lsx_vfmul_d:
4335   case Intrinsic::loongarch_lasx_xvfmul_s:
4336   case Intrinsic::loongarch_lasx_xvfmul_d:
4337     return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4338                        N->getOperand(2));
4339   case Intrinsic::loongarch_lsx_vfdiv_s:
4340   case Intrinsic::loongarch_lsx_vfdiv_d:
4341   case Intrinsic::loongarch_lasx_xvfdiv_s:
4342   case Intrinsic::loongarch_lasx_xvfdiv_d:
4343     return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4344                        N->getOperand(2));
4345   case Intrinsic::loongarch_lsx_vfmadd_s:
4346   case Intrinsic::loongarch_lsx_vfmadd_d:
4347   case Intrinsic::loongarch_lasx_xvfmadd_s:
4348   case Intrinsic::loongarch_lasx_xvfmadd_d:
4349     return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4350                        N->getOperand(2), N->getOperand(3));
4351   case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4352     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4353                        N->getOperand(1), N->getOperand(2),
4354                        legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4355   case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4356   case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4357     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4358                        N->getOperand(1), N->getOperand(2),
4359                        legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4360   case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4361   case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4362     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4363                        N->getOperand(1), N->getOperand(2),
4364                        legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4365   case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4366     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4367                        N->getOperand(1), N->getOperand(2),
4368                        legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4369   case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4370   case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4371   case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4372   case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4373   case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4374   case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4375   case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4376   case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
4377     return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
4378                        DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4379                                    N->getOperand(1)));
4380   case Intrinsic::loongarch_lsx_vreplve_b:
4381   case Intrinsic::loongarch_lsx_vreplve_h:
4382   case Intrinsic::loongarch_lsx_vreplve_w:
4383   case Intrinsic::loongarch_lsx_vreplve_d:
4384   case Intrinsic::loongarch_lasx_xvreplve_b:
4385   case Intrinsic::loongarch_lasx_xvreplve_h:
4386   case Intrinsic::loongarch_lasx_xvreplve_w:
4387   case Intrinsic::loongarch_lasx_xvreplve_d:
4388     return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4389                        N->getOperand(1),
4390                        DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4391                                    N->getOperand(2)));
4392   }
4393   return SDValue();
4394 }
4395 
4396 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
4397                                                    DAGCombinerInfo &DCI) const {
4398   SelectionDAG &DAG = DCI.DAG;
4399   switch (N->getOpcode()) {
4400   default:
4401     break;
4402   case ISD::AND:
4403     return performANDCombine(N, DAG, DCI, Subtarget);
4404   case ISD::OR:
4405     return performORCombine(N, DAG, DCI, Subtarget);
4406   case ISD::SETCC:
4407     return performSETCCCombine(N, DAG, DCI, Subtarget);
4408   case ISD::SRL:
4409     return performSRLCombine(N, DAG, DCI, Subtarget);
4410   case LoongArchISD::BITREV_W:
4411     return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4412   case ISD::INTRINSIC_WO_CHAIN:
4413     return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4414   }
4415   return SDValue();
4416 }
4417 
4418 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
4419                                               MachineBasicBlock *MBB) {
4420   if (!ZeroDivCheck)
4421     return MBB;
4422 
4423   // Build instructions:
4424   // MBB:
4425   //   div(or mod)   $dst, $dividend, $divisor
4426   //   bnez          $divisor, SinkMBB
4427   // BreakMBB:
4428   //   break         7 // BRK_DIVZERO
4429   // SinkMBB:
4430   //   fallthrough
4431   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4432   MachineFunction::iterator It = ++MBB->getIterator();
4433   MachineFunction *MF = MBB->getParent();
4434   auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4435   auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4436   MF->insert(It, BreakMBB);
4437   MF->insert(It, SinkMBB);
4438 
4439   // Transfer the remainder of MBB and its successor edges to SinkMBB.
4440   SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4441   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4442 
4443   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4444   DebugLoc DL = MI.getDebugLoc();
4445   MachineOperand &Divisor = MI.getOperand(2);
4446   Register DivisorReg = Divisor.getReg();
4447 
4448   // MBB:
4449   BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4450       .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4451       .addMBB(SinkMBB);
4452   MBB->addSuccessor(BreakMBB);
4453   MBB->addSuccessor(SinkMBB);
4454 
4455   // BreakMBB:
4456   // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4457   // definition of BRK_DIVZERO.
4458   BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4459   BreakMBB->addSuccessor(SinkMBB);
4460 
4461   // Clear Divisor's kill flag.
4462   Divisor.setIsKill(false);
4463 
4464   return SinkMBB;
4465 }
4466 
4467 static MachineBasicBlock *
4468 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
4469                         const LoongArchSubtarget &Subtarget) {
4470   unsigned CondOpc;
4471   switch (MI.getOpcode()) {
4472   default:
4473     llvm_unreachable("Unexpected opcode");
4474   case LoongArch::PseudoVBZ:
4475     CondOpc = LoongArch::VSETEQZ_V;
4476     break;
4477   case LoongArch::PseudoVBZ_B:
4478     CondOpc = LoongArch::VSETANYEQZ_B;
4479     break;
4480   case LoongArch::PseudoVBZ_H:
4481     CondOpc = LoongArch::VSETANYEQZ_H;
4482     break;
4483   case LoongArch::PseudoVBZ_W:
4484     CondOpc = LoongArch::VSETANYEQZ_W;
4485     break;
4486   case LoongArch::PseudoVBZ_D:
4487     CondOpc = LoongArch::VSETANYEQZ_D;
4488     break;
4489   case LoongArch::PseudoVBNZ:
4490     CondOpc = LoongArch::VSETNEZ_V;
4491     break;
4492   case LoongArch::PseudoVBNZ_B:
4493     CondOpc = LoongArch::VSETALLNEZ_B;
4494     break;
4495   case LoongArch::PseudoVBNZ_H:
4496     CondOpc = LoongArch::VSETALLNEZ_H;
4497     break;
4498   case LoongArch::PseudoVBNZ_W:
4499     CondOpc = LoongArch::VSETALLNEZ_W;
4500     break;
4501   case LoongArch::PseudoVBNZ_D:
4502     CondOpc = LoongArch::VSETALLNEZ_D;
4503     break;
4504   case LoongArch::PseudoXVBZ:
4505     CondOpc = LoongArch::XVSETEQZ_V;
4506     break;
4507   case LoongArch::PseudoXVBZ_B:
4508     CondOpc = LoongArch::XVSETANYEQZ_B;
4509     break;
4510   case LoongArch::PseudoXVBZ_H:
4511     CondOpc = LoongArch::XVSETANYEQZ_H;
4512     break;
4513   case LoongArch::PseudoXVBZ_W:
4514     CondOpc = LoongArch::XVSETANYEQZ_W;
4515     break;
4516   case LoongArch::PseudoXVBZ_D:
4517     CondOpc = LoongArch::XVSETANYEQZ_D;
4518     break;
4519   case LoongArch::PseudoXVBNZ:
4520     CondOpc = LoongArch::XVSETNEZ_V;
4521     break;
4522   case LoongArch::PseudoXVBNZ_B:
4523     CondOpc = LoongArch::XVSETALLNEZ_B;
4524     break;
4525   case LoongArch::PseudoXVBNZ_H:
4526     CondOpc = LoongArch::XVSETALLNEZ_H;
4527     break;
4528   case LoongArch::PseudoXVBNZ_W:
4529     CondOpc = LoongArch::XVSETALLNEZ_W;
4530     break;
4531   case LoongArch::PseudoXVBNZ_D:
4532     CondOpc = LoongArch::XVSETALLNEZ_D;
4533     break;
4534   }
4535 
4536   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4537   const BasicBlock *LLVM_BB = BB->getBasicBlock();
4538   DebugLoc DL = MI.getDebugLoc();
4539   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4540   MachineFunction::iterator It = ++BB->getIterator();
4541 
4542   MachineFunction *F = BB->getParent();
4543   MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4544   MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4545   MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4546 
4547   F->insert(It, FalseBB);
4548   F->insert(It, TrueBB);
4549   F->insert(It, SinkBB);
4550 
4551   // Transfer the remainder of MBB and its successor edges to Sink.
4552   SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4553   SinkBB->transferSuccessorsAndUpdatePHIs(BB);
4554 
4555   // Insert the real instruction to BB.
4556   Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4557   BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4558 
4559   // Insert branch.
4560   BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4561   BB->addSuccessor(FalseBB);
4562   BB->addSuccessor(TrueBB);
4563 
4564   // FalseBB.
4565   Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4566   BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4567       .addReg(LoongArch::R0)
4568       .addImm(0);
4569   BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4570   FalseBB->addSuccessor(SinkBB);
4571 
4572   // TrueBB.
4573   Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4574   BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4575       .addReg(LoongArch::R0)
4576       .addImm(1);
4577   TrueBB->addSuccessor(SinkBB);
4578 
4579   // SinkBB: merge the results.
4580   BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4581           MI.getOperand(0).getReg())
4582       .addReg(RD1)
4583       .addMBB(FalseBB)
4584       .addReg(RD2)
4585       .addMBB(TrueBB);
4586 
4587   // The pseudo instruction is gone now.
4588   MI.eraseFromParent();
4589   return SinkBB;
4590 }
4591 
4592 static MachineBasicBlock *
4593 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
4594                      const LoongArchSubtarget &Subtarget) {
4595   unsigned InsOp;
4596   unsigned HalfSize;
4597   switch (MI.getOpcode()) {
4598   default:
4599     llvm_unreachable("Unexpected opcode");
4600   case LoongArch::PseudoXVINSGR2VR_B:
4601     HalfSize = 16;
4602     InsOp = LoongArch::VINSGR2VR_B;
4603     break;
4604   case LoongArch::PseudoXVINSGR2VR_H:
4605     HalfSize = 8;
4606     InsOp = LoongArch::VINSGR2VR_H;
4607     break;
4608   }
4609   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4610   const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4611   const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4612   DebugLoc DL = MI.getDebugLoc();
4613   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4614   // XDst = vector_insert XSrc, Elt, Idx
4615   Register XDst = MI.getOperand(0).getReg();
4616   Register XSrc = MI.getOperand(1).getReg();
4617   Register Elt = MI.getOperand(2).getReg();
4618   unsigned Idx = MI.getOperand(3).getImm();
4619 
4620   Register ScratchReg1 = XSrc;
4621   if (Idx >= HalfSize) {
4622     ScratchReg1 = MRI.createVirtualRegister(RC);
4623     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4624         .addReg(XSrc)
4625         .addReg(XSrc)
4626         .addImm(1);
4627   }
4628 
4629   Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4630   Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4631   BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4632       .addReg(ScratchReg1, 0, LoongArch::sub_128);
4633   BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4634       .addReg(ScratchSubReg1)
4635       .addReg(Elt)
4636       .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4637 
4638   Register ScratchReg2 = XDst;
4639   if (Idx >= HalfSize)
4640     ScratchReg2 = MRI.createVirtualRegister(RC);
4641 
4642   BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4643       .addImm(0)
4644       .addReg(ScratchSubReg2)
4645       .addImm(LoongArch::sub_128);
4646 
4647   if (Idx >= HalfSize)
4648     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4649         .addReg(XSrc)
4650         .addReg(ScratchReg2)
4651         .addImm(2);
4652 
4653   MI.eraseFromParent();
4654   return BB;
4655 }
4656 
4657 static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
4658                                           MachineBasicBlock *BB,
4659                                           const LoongArchSubtarget &Subtarget) {
4660   assert(Subtarget.hasExtLSX());
4661   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4662   const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
4663   DebugLoc DL = MI.getDebugLoc();
4664   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4665   Register Dst = MI.getOperand(0).getReg();
4666   Register Src = MI.getOperand(1).getReg();
4667   Register ScratchReg1 = MRI.createVirtualRegister(RC);
4668   Register ScratchReg2 = MRI.createVirtualRegister(RC);
4669   Register ScratchReg3 = MRI.createVirtualRegister(RC);
4670 
4671   BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
4672   BuildMI(*BB, MI, DL,
4673           TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
4674                                        : LoongArch::VINSGR2VR_W),
4675           ScratchReg2)
4676       .addReg(ScratchReg1)
4677       .addReg(Src)
4678       .addImm(0);
4679   BuildMI(
4680       *BB, MI, DL,
4681       TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
4682       ScratchReg3)
4683       .addReg(ScratchReg2);
4684   BuildMI(*BB, MI, DL,
4685           TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
4686                                        : LoongArch::VPICKVE2GR_W),
4687           Dst)
4688       .addReg(ScratchReg3)
4689       .addImm(0);
4690 
4691   MI.eraseFromParent();
4692   return BB;
4693 }
4694 
4695 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4696     MachineInstr &MI, MachineBasicBlock *BB) const {
4697   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4698   DebugLoc DL = MI.getDebugLoc();
4699 
4700   switch (MI.getOpcode()) {
4701   default:
4702     llvm_unreachable("Unexpected instr type to insert");
4703   case LoongArch::DIV_W:
4704   case LoongArch::DIV_WU:
4705   case LoongArch::MOD_W:
4706   case LoongArch::MOD_WU:
4707   case LoongArch::DIV_D:
4708   case LoongArch::DIV_DU:
4709   case LoongArch::MOD_D:
4710   case LoongArch::MOD_DU:
4711     return insertDivByZeroTrap(MI, BB);
4712     break;
4713   case LoongArch::WRFCSR: {
4714     BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4715             LoongArch::FCSR0 + MI.getOperand(0).getImm())
4716         .addReg(MI.getOperand(1).getReg());
4717     MI.eraseFromParent();
4718     return BB;
4719   }
4720   case LoongArch::RDFCSR: {
4721     MachineInstr *ReadFCSR =
4722         BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4723                 MI.getOperand(0).getReg())
4724             .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4725     ReadFCSR->getOperand(1).setIsUndef();
4726     MI.eraseFromParent();
4727     return BB;
4728   }
4729   case LoongArch::PseudoVBZ:
4730   case LoongArch::PseudoVBZ_B:
4731   case LoongArch::PseudoVBZ_H:
4732   case LoongArch::PseudoVBZ_W:
4733   case LoongArch::PseudoVBZ_D:
4734   case LoongArch::PseudoVBNZ:
4735   case LoongArch::PseudoVBNZ_B:
4736   case LoongArch::PseudoVBNZ_H:
4737   case LoongArch::PseudoVBNZ_W:
4738   case LoongArch::PseudoVBNZ_D:
4739   case LoongArch::PseudoXVBZ:
4740   case LoongArch::PseudoXVBZ_B:
4741   case LoongArch::PseudoXVBZ_H:
4742   case LoongArch::PseudoXVBZ_W:
4743   case LoongArch::PseudoXVBZ_D:
4744   case LoongArch::PseudoXVBNZ:
4745   case LoongArch::PseudoXVBNZ_B:
4746   case LoongArch::PseudoXVBNZ_H:
4747   case LoongArch::PseudoXVBNZ_W:
4748   case LoongArch::PseudoXVBNZ_D:
4749     return emitVecCondBranchPseudo(MI, BB, Subtarget);
4750   case LoongArch::PseudoXVINSGR2VR_B:
4751   case LoongArch::PseudoXVINSGR2VR_H:
4752     return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4753   case LoongArch::PseudoCTPOP:
4754     return emitPseudoCTPOP(MI, BB, Subtarget);
4755   case TargetOpcode::STATEPOINT:
4756     // STATEPOINT is a pseudo instruction which has no implicit defs/uses
4757     // while bl call instruction (where statepoint will be lowered at the
4758     // end) has implicit def. This def is early-clobber as it will be set at
4759     // the moment of the call and earlier than any use is read.
4760     // Add this implicit dead def here as a workaround.
4761     MI.addOperand(*MI.getMF(),
4762                   MachineOperand::CreateReg(
4763                       LoongArch::R1, /*isDef*/ true,
4764                       /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
4765                       /*isUndef*/ false, /*isEarlyClobber*/ true));
4766     if (!Subtarget.is64Bit())
4767       report_fatal_error("STATEPOINT is only supported on 64-bit targets");
4768     return emitPatchPoint(MI, BB);
4769   }
4770 }
4771 
4772 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
4773     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4774     unsigned *Fast) const {
4775   if (!Subtarget.hasUAL())
4776     return false;
4777 
4778   // TODO: set reasonable speed number.
4779   if (Fast)
4780     *Fast = 1;
4781   return true;
4782 }
4783 
4784 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4785   switch ((LoongArchISD::NodeType)Opcode) {
4786   case LoongArchISD::FIRST_NUMBER:
4787     break;
4788 
4789 #define NODE_NAME_CASE(node)                                                   \
4790   case LoongArchISD::node:                                                     \
4791     return "LoongArchISD::" #node;
4792 
4793     // TODO: Add more target-dependent nodes later.
4794     NODE_NAME_CASE(CALL)
4795     NODE_NAME_CASE(CALL_MEDIUM)
4796     NODE_NAME_CASE(CALL_LARGE)
4797     NODE_NAME_CASE(RET)
4798     NODE_NAME_CASE(TAIL)
4799     NODE_NAME_CASE(TAIL_MEDIUM)
4800     NODE_NAME_CASE(TAIL_LARGE)
4801     NODE_NAME_CASE(SLL_W)
4802     NODE_NAME_CASE(SRA_W)
4803     NODE_NAME_CASE(SRL_W)
4804     NODE_NAME_CASE(BSTRINS)
4805     NODE_NAME_CASE(BSTRPICK)
4806     NODE_NAME_CASE(MOVGR2FR_W_LA64)
4807     NODE_NAME_CASE(MOVFR2GR_S_LA64)
4808     NODE_NAME_CASE(FTINT)
4809     NODE_NAME_CASE(REVB_2H)
4810     NODE_NAME_CASE(REVB_2W)
4811     NODE_NAME_CASE(BITREV_4B)
4812     NODE_NAME_CASE(BITREV_8B)
4813     NODE_NAME_CASE(BITREV_W)
4814     NODE_NAME_CASE(ROTR_W)
4815     NODE_NAME_CASE(ROTL_W)
4816     NODE_NAME_CASE(DIV_W)
4817     NODE_NAME_CASE(DIV_WU)
4818     NODE_NAME_CASE(MOD_W)
4819     NODE_NAME_CASE(MOD_WU)
4820     NODE_NAME_CASE(CLZ_W)
4821     NODE_NAME_CASE(CTZ_W)
4822     NODE_NAME_CASE(DBAR)
4823     NODE_NAME_CASE(IBAR)
4824     NODE_NAME_CASE(BREAK)
4825     NODE_NAME_CASE(SYSCALL)
4826     NODE_NAME_CASE(CRC_W_B_W)
4827     NODE_NAME_CASE(CRC_W_H_W)
4828     NODE_NAME_CASE(CRC_W_W_W)
4829     NODE_NAME_CASE(CRC_W_D_W)
4830     NODE_NAME_CASE(CRCC_W_B_W)
4831     NODE_NAME_CASE(CRCC_W_H_W)
4832     NODE_NAME_CASE(CRCC_W_W_W)
4833     NODE_NAME_CASE(CRCC_W_D_W)
4834     NODE_NAME_CASE(CSRRD)
4835     NODE_NAME_CASE(CSRWR)
4836     NODE_NAME_CASE(CSRXCHG)
4837     NODE_NAME_CASE(IOCSRRD_B)
4838     NODE_NAME_CASE(IOCSRRD_H)
4839     NODE_NAME_CASE(IOCSRRD_W)
4840     NODE_NAME_CASE(IOCSRRD_D)
4841     NODE_NAME_CASE(IOCSRWR_B)
4842     NODE_NAME_CASE(IOCSRWR_H)
4843     NODE_NAME_CASE(IOCSRWR_W)
4844     NODE_NAME_CASE(IOCSRWR_D)
4845     NODE_NAME_CASE(CPUCFG)
4846     NODE_NAME_CASE(MOVGR2FCSR)
4847     NODE_NAME_CASE(MOVFCSR2GR)
4848     NODE_NAME_CASE(CACOP_D)
4849     NODE_NAME_CASE(CACOP_W)
4850     NODE_NAME_CASE(VSHUF)
4851     NODE_NAME_CASE(VPICKEV)
4852     NODE_NAME_CASE(VPICKOD)
4853     NODE_NAME_CASE(VPACKEV)
4854     NODE_NAME_CASE(VPACKOD)
4855     NODE_NAME_CASE(VILVL)
4856     NODE_NAME_CASE(VILVH)
4857     NODE_NAME_CASE(VSHUF4I)
4858     NODE_NAME_CASE(VREPLVEI)
4859     NODE_NAME_CASE(VREPLGR2VR)
4860     NODE_NAME_CASE(XVPERMI)
4861     NODE_NAME_CASE(VPICK_SEXT_ELT)
4862     NODE_NAME_CASE(VPICK_ZEXT_ELT)
4863     NODE_NAME_CASE(VREPLVE)
4864     NODE_NAME_CASE(VALL_ZERO)
4865     NODE_NAME_CASE(VANY_ZERO)
4866     NODE_NAME_CASE(VALL_NONZERO)
4867     NODE_NAME_CASE(VANY_NONZERO)
4868     NODE_NAME_CASE(FRECIPE)
4869     NODE_NAME_CASE(FRSQRTE)
4870   }
4871 #undef NODE_NAME_CASE
4872   return nullptr;
4873 }
4874 
4875 //===----------------------------------------------------------------------===//
4876 //                     Calling Convention Implementation
4877 //===----------------------------------------------------------------------===//
4878 
4879 // Eight general-purpose registers a0-a7 used for passing integer arguments,
4880 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
4881 // fixed-point arguments, and floating-point arguments when no FPR is available
4882 // or with soft float ABI.
4883 const MCPhysReg ArgGPRs[] = {LoongArch::R4,  LoongArch::R5, LoongArch::R6,
4884                              LoongArch::R7,  LoongArch::R8, LoongArch::R9,
4885                              LoongArch::R10, LoongArch::R11};
4886 // Eight floating-point registers fa0-fa7 used for passing floating-point
4887 // arguments, and fa0-fa1 are also used to return values.
4888 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4889                                LoongArch::F3, LoongArch::F4, LoongArch::F5,
4890                                LoongArch::F6, LoongArch::F7};
4891 // FPR32 and FPR64 alias each other.
4892 const MCPhysReg ArgFPR64s[] = {
4893     LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4894     LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4895 
4896 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4897                             LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4898                             LoongArch::VR6, LoongArch::VR7};
4899 
4900 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4901                             LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4902                             LoongArch::XR6, LoongArch::XR7};
4903 
4904 // Pass a 2*GRLen argument that has been split into two GRLen values through
4905 // registers or the stack as necessary.
4906 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4907                                      CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4908                                      unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4909                                      ISD::ArgFlagsTy ArgFlags2) {
4910   unsigned GRLenInBytes = GRLen / 8;
4911   if (Register Reg = State.AllocateReg(ArgGPRs)) {
4912     // At least one half can be passed via register.
4913     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4914                                      VA1.getLocVT(), CCValAssign::Full));
4915   } else {
4916     // Both halves must be passed on the stack, with proper alignment.
4917     Align StackAlign =
4918         std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4919     State.addLoc(
4920         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
4921                             State.AllocateStack(GRLenInBytes, StackAlign),
4922                             VA1.getLocVT(), CCValAssign::Full));
4923     State.addLoc(CCValAssign::getMem(
4924         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4925         LocVT2, CCValAssign::Full));
4926     return false;
4927   }
4928   if (Register Reg = State.AllocateReg(ArgGPRs)) {
4929     // The second half can also be passed via register.
4930     State.addLoc(
4931         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4932   } else {
4933     // The second half is passed via the stack, without additional alignment.
4934     State.addLoc(CCValAssign::getMem(
4935         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4936         LocVT2, CCValAssign::Full));
4937   }
4938   return false;
4939 }
4940 
4941 // Implements the LoongArch calling convention. Returns true upon failure.
4942 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
4943                          unsigned ValNo, MVT ValVT,
4944                          CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4945                          CCState &State, bool IsFixed, bool IsRet,
4946                          Type *OrigTy) {
4947   unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4948   assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4949   MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4950   MVT LocVT = ValVT;
4951 
4952   // Any return value split into more than two values can't be returned
4953   // directly.
4954   if (IsRet && ValNo > 1)
4955     return true;
4956 
4957   // If passing a variadic argument, or if no FPR is available.
4958   bool UseGPRForFloat = true;
4959 
4960   switch (ABI) {
4961   default:
4962     llvm_unreachable("Unexpected ABI");
4963     break;
4964   case LoongArchABI::ABI_ILP32F:
4965   case LoongArchABI::ABI_LP64F:
4966   case LoongArchABI::ABI_ILP32D:
4967   case LoongArchABI::ABI_LP64D:
4968     UseGPRForFloat = !IsFixed;
4969     break;
4970   case LoongArchABI::ABI_ILP32S:
4971   case LoongArchABI::ABI_LP64S:
4972     break;
4973   }
4974 
4975   // FPR32 and FPR64 alias each other.
4976   if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4977     UseGPRForFloat = true;
4978 
4979   if (UseGPRForFloat && ValVT == MVT::f32) {
4980     LocVT = GRLenVT;
4981     LocInfo = CCValAssign::BCvt;
4982   } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4983     LocVT = MVT::i64;
4984     LocInfo = CCValAssign::BCvt;
4985   } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4986     // TODO: Handle passing f64 on LA32 with D feature.
4987     report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4988   }
4989 
4990   // If this is a variadic argument, the LoongArch calling convention requires
4991   // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4992   // byte alignment. An aligned register should be used regardless of whether
4993   // the original argument was split during legalisation or not. The argument
4994   // will not be passed by registers if the original type is larger than
4995   // 2*GRLen, so the register alignment rule does not apply.
4996   unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4997   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4998       DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4999     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
5000     // Skip 'odd' register if necessary.
5001     if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
5002       State.AllocateReg(ArgGPRs);
5003   }
5004 
5005   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
5006   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
5007       State.getPendingArgFlags();
5008 
5009   assert(PendingLocs.size() == PendingArgFlags.size() &&
5010          "PendingLocs and PendingArgFlags out of sync");
5011 
5012   // Split arguments might be passed indirectly, so keep track of the pending
5013   // values.
5014   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
5015     LocVT = GRLenVT;
5016     LocInfo = CCValAssign::Indirect;
5017     PendingLocs.push_back(
5018         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
5019     PendingArgFlags.push_back(ArgFlags);
5020     if (!ArgFlags.isSplitEnd()) {
5021       return false;
5022     }
5023   }
5024 
5025   // If the split argument only had two elements, it should be passed directly
5026   // in registers or on the stack.
5027   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
5028       PendingLocs.size() <= 2) {
5029     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
5030     // Apply the normal calling convention rules to the first half of the
5031     // split argument.
5032     CCValAssign VA = PendingLocs[0];
5033     ISD::ArgFlagsTy AF = PendingArgFlags[0];
5034     PendingLocs.clear();
5035     PendingArgFlags.clear();
5036     return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
5037                                     ArgFlags);
5038   }
5039 
5040   // Allocate to a register if possible, or else a stack slot.
5041   Register Reg;
5042   unsigned StoreSizeBytes = GRLen / 8;
5043   Align StackAlign = Align(GRLen / 8);
5044 
5045   if (ValVT == MVT::f32 && !UseGPRForFloat)
5046     Reg = State.AllocateReg(ArgFPR32s);
5047   else if (ValVT == MVT::f64 && !UseGPRForFloat)
5048     Reg = State.AllocateReg(ArgFPR64s);
5049   else if (ValVT.is128BitVector())
5050     Reg = State.AllocateReg(ArgVRs);
5051   else if (ValVT.is256BitVector())
5052     Reg = State.AllocateReg(ArgXRs);
5053   else
5054     Reg = State.AllocateReg(ArgGPRs);
5055 
5056   unsigned StackOffset =
5057       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
5058 
5059   // If we reach this point and PendingLocs is non-empty, we must be at the
5060   // end of a split argument that must be passed indirectly.
5061   if (!PendingLocs.empty()) {
5062     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
5063     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
5064     for (auto &It : PendingLocs) {
5065       if (Reg)
5066         It.convertToReg(Reg);
5067       else
5068         It.convertToMem(StackOffset);
5069       State.addLoc(It);
5070     }
5071     PendingLocs.clear();
5072     PendingArgFlags.clear();
5073     return false;
5074   }
5075   assert((!UseGPRForFloat || LocVT == GRLenVT) &&
5076          "Expected an GRLenVT at this stage");
5077 
5078   if (Reg) {
5079     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5080     return false;
5081   }
5082 
5083   // When a floating-point value is passed on the stack, no bit-cast is needed.
5084   if (ValVT.isFloatingPoint()) {
5085     LocVT = ValVT;
5086     LocInfo = CCValAssign::Full;
5087   }
5088 
5089   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5090   return false;
5091 }
5092 
5093 void LoongArchTargetLowering::analyzeInputArgs(
5094     MachineFunction &MF, CCState &CCInfo,
5095     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
5096     LoongArchCCAssignFn Fn) const {
5097   FunctionType *FType = MF.getFunction().getFunctionType();
5098   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5099     MVT ArgVT = Ins[i].VT;
5100     Type *ArgTy = nullptr;
5101     if (IsRet)
5102       ArgTy = FType->getReturnType();
5103     else if (Ins[i].isOrigArg())
5104       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
5105     LoongArchABI::ABI ABI =
5106         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5107     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
5108            CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
5109       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
5110                         << '\n');
5111       llvm_unreachable("");
5112     }
5113   }
5114 }
5115 
5116 void LoongArchTargetLowering::analyzeOutputArgs(
5117     MachineFunction &MF, CCState &CCInfo,
5118     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
5119     CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
5120   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5121     MVT ArgVT = Outs[i].VT;
5122     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
5123     LoongArchABI::ABI ABI =
5124         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5125     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
5126            CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
5127       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
5128                         << "\n");
5129       llvm_unreachable("");
5130     }
5131   }
5132 }
5133 
5134 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5135 // values.
5136 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
5137                                    const CCValAssign &VA, const SDLoc &DL) {
5138   switch (VA.getLocInfo()) {
5139   default:
5140     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5141   case CCValAssign::Full:
5142   case CCValAssign::Indirect:
5143     break;
5144   case CCValAssign::BCvt:
5145     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5146       Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
5147     else
5148       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5149     break;
5150   }
5151   return Val;
5152 }
5153 
5154 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
5155                                 const CCValAssign &VA, const SDLoc &DL,
5156                                 const ISD::InputArg &In,
5157                                 const LoongArchTargetLowering &TLI) {
5158   MachineFunction &MF = DAG.getMachineFunction();
5159   MachineRegisterInfo &RegInfo = MF.getRegInfo();
5160   EVT LocVT = VA.getLocVT();
5161   SDValue Val;
5162   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5163   Register VReg = RegInfo.createVirtualRegister(RC);
5164   RegInfo.addLiveIn(VA.getLocReg(), VReg);
5165   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5166 
5167   // If input is sign extended from 32 bits, note it for the OptW pass.
5168   if (In.isOrigArg()) {
5169     Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
5170     if (OrigArg->getType()->isIntegerTy()) {
5171       unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
5172       // An input zero extended from i31 can also be considered sign extended.
5173       if ((BitWidth <= 32 && In.Flags.isSExt()) ||
5174           (BitWidth < 32 && In.Flags.isZExt())) {
5175         LoongArchMachineFunctionInfo *LAFI =
5176             MF.getInfo<LoongArchMachineFunctionInfo>();
5177         LAFI->addSExt32Register(VReg);
5178       }
5179     }
5180   }
5181 
5182   return convertLocVTToValVT(DAG, Val, VA, DL);
5183 }
5184 
5185 // The caller is responsible for loading the full value if the argument is
5186 // passed with CCValAssign::Indirect.
5187 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
5188                                 const CCValAssign &VA, const SDLoc &DL) {
5189   MachineFunction &MF = DAG.getMachineFunction();
5190   MachineFrameInfo &MFI = MF.getFrameInfo();
5191   EVT ValVT = VA.getValVT();
5192   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
5193                                  /*IsImmutable=*/true);
5194   SDValue FIN = DAG.getFrameIndex(
5195       FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
5196 
5197   ISD::LoadExtType ExtType;
5198   switch (VA.getLocInfo()) {
5199   default:
5200     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5201   case CCValAssign::Full:
5202   case CCValAssign::Indirect:
5203   case CCValAssign::BCvt:
5204     ExtType = ISD::NON_EXTLOAD;
5205     break;
5206   }
5207   return DAG.getExtLoad(
5208       ExtType, DL, VA.getLocVT(), Chain, FIN,
5209       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
5210 }
5211 
5212 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
5213                                    const CCValAssign &VA, const SDLoc &DL) {
5214   EVT LocVT = VA.getLocVT();
5215 
5216   switch (VA.getLocInfo()) {
5217   default:
5218     llvm_unreachable("Unexpected CCValAssign::LocInfo");
5219   case CCValAssign::Full:
5220     break;
5221   case CCValAssign::BCvt:
5222     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5223       Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
5224     else
5225       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5226     break;
5227   }
5228   return Val;
5229 }
5230 
5231 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5232                              CCValAssign::LocInfo LocInfo,
5233                              ISD::ArgFlagsTy ArgFlags, CCState &State) {
5234   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5235     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5236     //                        s0    s1  s2  s3  s4  s5  s6  s7  s8
5237     static const MCPhysReg GPRList[] = {
5238         LoongArch::R23, LoongArch::R24, LoongArch::R25,
5239         LoongArch::R26, LoongArch::R27, LoongArch::R28,
5240         LoongArch::R29, LoongArch::R30, LoongArch::R31};
5241     if (MCRegister Reg = State.AllocateReg(GPRList)) {
5242       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5243       return false;
5244     }
5245   }
5246 
5247   if (LocVT == MVT::f32) {
5248     // Pass in STG registers: F1, F2, F3, F4
5249     //                        fs0,fs1,fs2,fs3
5250     static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5251                                           LoongArch::F26, LoongArch::F27};
5252     if (MCRegister Reg = State.AllocateReg(FPR32List)) {
5253       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5254       return false;
5255     }
5256   }
5257 
5258   if (LocVT == MVT::f64) {
5259     // Pass in STG registers: D1, D2, D3, D4
5260     //                        fs4,fs5,fs6,fs7
5261     static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5262                                           LoongArch::F30_64, LoongArch::F31_64};
5263     if (MCRegister Reg = State.AllocateReg(FPR64List)) {
5264       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5265       return false;
5266     }
5267   }
5268 
5269   report_fatal_error("No registers left in GHC calling convention");
5270   return true;
5271 }
5272 
5273 // Transform physical registers into virtual registers.
5274 SDValue LoongArchTargetLowering::LowerFormalArguments(
5275     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5276     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5277     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5278 
5279   MachineFunction &MF = DAG.getMachineFunction();
5280 
5281   switch (CallConv) {
5282   default:
5283     llvm_unreachable("Unsupported calling convention");
5284   case CallingConv::C:
5285   case CallingConv::Fast:
5286     break;
5287   case CallingConv::GHC:
5288     if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5289         !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5290       report_fatal_error(
5291           "GHC calling convention requires the F and D extensions");
5292   }
5293 
5294   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5295   MVT GRLenVT = Subtarget.getGRLenVT();
5296   unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5297   // Used with varargs to acumulate store chains.
5298   std::vector<SDValue> OutChains;
5299 
5300   // Assign locations to all of the incoming arguments.
5301   SmallVector<CCValAssign> ArgLocs;
5302   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5303 
5304   if (CallConv == CallingConv::GHC)
5305     CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
5306   else
5307     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5308 
5309   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5310     CCValAssign &VA = ArgLocs[i];
5311     SDValue ArgValue;
5312     if (VA.isRegLoc())
5313       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5314     else
5315       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5316     if (VA.getLocInfo() == CCValAssign::Indirect) {
5317       // If the original argument was split and passed by reference, we need to
5318       // load all parts of it here (using the same address).
5319       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5320                                    MachinePointerInfo()));
5321       unsigned ArgIndex = Ins[i].OrigArgIndex;
5322       unsigned ArgPartOffset = Ins[i].PartOffset;
5323       assert(ArgPartOffset == 0);
5324       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5325         CCValAssign &PartVA = ArgLocs[i + 1];
5326         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5327         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5328         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5329         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5330                                      MachinePointerInfo()));
5331         ++i;
5332       }
5333       continue;
5334     }
5335     InVals.push_back(ArgValue);
5336   }
5337 
5338   if (IsVarArg) {
5339     ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
5340     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5341     const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5342     MachineFrameInfo &MFI = MF.getFrameInfo();
5343     MachineRegisterInfo &RegInfo = MF.getRegInfo();
5344     auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5345 
5346     // Offset of the first variable argument from stack pointer, and size of
5347     // the vararg save area. For now, the varargs save area is either zero or
5348     // large enough to hold a0-a7.
5349     int VaArgOffset, VarArgsSaveSize;
5350 
5351     // If all registers are allocated, then all varargs must be passed on the
5352     // stack and we don't need to save any argregs.
5353     if (ArgRegs.size() == Idx) {
5354       VaArgOffset = CCInfo.getStackSize();
5355       VarArgsSaveSize = 0;
5356     } else {
5357       VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5358       VaArgOffset = -VarArgsSaveSize;
5359     }
5360 
5361     // Record the frame index of the first variable argument
5362     // which is a value necessary to VASTART.
5363     int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5364     LoongArchFI->setVarArgsFrameIndex(FI);
5365 
5366     // If saving an odd number of registers then create an extra stack slot to
5367     // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5368     // offsets to even-numbered registered remain 2*GRLen-aligned.
5369     if (Idx % 2) {
5370       MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5371                             true);
5372       VarArgsSaveSize += GRLenInBytes;
5373     }
5374 
5375     // Copy the integer registers that may have been used for passing varargs
5376     // to the vararg save area.
5377     for (unsigned I = Idx; I < ArgRegs.size();
5378          ++I, VaArgOffset += GRLenInBytes) {
5379       const Register Reg = RegInfo.createVirtualRegister(RC);
5380       RegInfo.addLiveIn(ArgRegs[I], Reg);
5381       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5382       FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5383       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5384       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5385                                    MachinePointerInfo::getFixedStack(MF, FI));
5386       cast<StoreSDNode>(Store.getNode())
5387           ->getMemOperand()
5388           ->setValue((Value *)nullptr);
5389       OutChains.push_back(Store);
5390     }
5391     LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5392   }
5393 
5394   // All stores are grouped in one node to allow the matching between
5395   // the size of Ins and InVals. This only happens for vararg functions.
5396   if (!OutChains.empty()) {
5397     OutChains.push_back(Chain);
5398     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5399   }
5400 
5401   return Chain;
5402 }
5403 
5404 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
5405   return CI->isTailCall();
5406 }
5407 
5408 // Check if the return value is used as only a return value, as otherwise
5409 // we can't perform a tail-call.
5410 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
5411                                                  SDValue &Chain) const {
5412   if (N->getNumValues() != 1)
5413     return false;
5414   if (!N->hasNUsesOfValue(1, 0))
5415     return false;
5416 
5417   SDNode *Copy = *N->user_begin();
5418   if (Copy->getOpcode() != ISD::CopyToReg)
5419     return false;
5420 
5421   // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5422   // isn't safe to perform a tail call.
5423   if (Copy->getGluedNode())
5424     return false;
5425 
5426   // The copy must be used by a LoongArchISD::RET, and nothing else.
5427   bool HasRet = false;
5428   for (SDNode *Node : Copy->users()) {
5429     if (Node->getOpcode() != LoongArchISD::RET)
5430       return false;
5431     HasRet = true;
5432   }
5433 
5434   if (!HasRet)
5435     return false;
5436 
5437   Chain = Copy->getOperand(0);
5438   return true;
5439 }
5440 
5441 // Check whether the call is eligible for tail call optimization.
5442 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5443     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5444     const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5445 
5446   auto CalleeCC = CLI.CallConv;
5447   auto &Outs = CLI.Outs;
5448   auto &Caller = MF.getFunction();
5449   auto CallerCC = Caller.getCallingConv();
5450 
5451   // Do not tail call opt if the stack is used to pass parameters.
5452   if (CCInfo.getStackSize() != 0)
5453     return false;
5454 
5455   // Do not tail call opt if any parameters need to be passed indirectly.
5456   for (auto &VA : ArgLocs)
5457     if (VA.getLocInfo() == CCValAssign::Indirect)
5458       return false;
5459 
5460   // Do not tail call opt if either caller or callee uses struct return
5461   // semantics.
5462   auto IsCallerStructRet = Caller.hasStructRetAttr();
5463   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5464   if (IsCallerStructRet || IsCalleeStructRet)
5465     return false;
5466 
5467   // Do not tail call opt if either the callee or caller has a byval argument.
5468   for (auto &Arg : Outs)
5469     if (Arg.Flags.isByVal())
5470       return false;
5471 
5472   // The callee has to preserve all registers the caller needs to preserve.
5473   const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5474   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5475   if (CalleeCC != CallerCC) {
5476     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5477     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5478       return false;
5479   }
5480   return true;
5481 }
5482 
5483 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
5484   return DAG.getDataLayout().getPrefTypeAlign(
5485       VT.getTypeForEVT(*DAG.getContext()));
5486 }
5487 
5488 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5489 // and output parameter nodes.
5490 SDValue
5491 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
5492                                    SmallVectorImpl<SDValue> &InVals) const {
5493   SelectionDAG &DAG = CLI.DAG;
5494   SDLoc &DL = CLI.DL;
5495   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5496   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5497   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5498   SDValue Chain = CLI.Chain;
5499   SDValue Callee = CLI.Callee;
5500   CallingConv::ID CallConv = CLI.CallConv;
5501   bool IsVarArg = CLI.IsVarArg;
5502   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5503   MVT GRLenVT = Subtarget.getGRLenVT();
5504   bool &IsTailCall = CLI.IsTailCall;
5505 
5506   MachineFunction &MF = DAG.getMachineFunction();
5507 
5508   // Analyze the operands of the call, assigning locations to each operand.
5509   SmallVector<CCValAssign> ArgLocs;
5510   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5511 
5512   if (CallConv == CallingConv::GHC)
5513     ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5514   else
5515     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5516 
5517   // Check if it's really possible to do a tail call.
5518   if (IsTailCall)
5519     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5520 
5521   if (IsTailCall)
5522     ++NumTailCalls;
5523   else if (CLI.CB && CLI.CB->isMustTailCall())
5524     report_fatal_error("failed to perform tail call elimination on a call "
5525                        "site marked musttail");
5526 
5527   // Get a count of how many bytes are to be pushed on the stack.
5528   unsigned NumBytes = ArgCCInfo.getStackSize();
5529 
5530   // Create local copies for byval args.
5531   SmallVector<SDValue> ByValArgs;
5532   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5533     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5534     if (!Flags.isByVal())
5535       continue;
5536 
5537     SDValue Arg = OutVals[i];
5538     unsigned Size = Flags.getByValSize();
5539     Align Alignment = Flags.getNonZeroByValAlign();
5540 
5541     int FI =
5542         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5543     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5544     SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5545 
5546     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5547                           /*IsVolatile=*/false,
5548                           /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5549                           MachinePointerInfo(), MachinePointerInfo());
5550     ByValArgs.push_back(FIPtr);
5551   }
5552 
5553   if (!IsTailCall)
5554     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5555 
5556   // Copy argument values to their designated locations.
5557   SmallVector<std::pair<Register, SDValue>> RegsToPass;
5558   SmallVector<SDValue> MemOpChains;
5559   SDValue StackPtr;
5560   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5561     CCValAssign &VA = ArgLocs[i];
5562     SDValue ArgValue = OutVals[i];
5563     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5564 
5565     // Promote the value if needed.
5566     // For now, only handle fully promoted and indirect arguments.
5567     if (VA.getLocInfo() == CCValAssign::Indirect) {
5568       // Store the argument in a stack slot and pass its address.
5569       Align StackAlign =
5570           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5571                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
5572       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5573       // If the original argument was split and passed by reference, we need to
5574       // store the required parts of it here (and pass just one address).
5575       unsigned ArgIndex = Outs[i].OrigArgIndex;
5576       unsigned ArgPartOffset = Outs[i].PartOffset;
5577       assert(ArgPartOffset == 0);
5578       // Calculate the total size to store. We don't have access to what we're
5579       // actually storing other than performing the loop and collecting the
5580       // info.
5581       SmallVector<std::pair<SDValue, SDValue>> Parts;
5582       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5583         SDValue PartValue = OutVals[i + 1];
5584         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5585         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5586         EVT PartVT = PartValue.getValueType();
5587 
5588         StoredSize += PartVT.getStoreSize();
5589         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5590         Parts.push_back(std::make_pair(PartValue, Offset));
5591         ++i;
5592       }
5593       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5594       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5595       MemOpChains.push_back(
5596           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5597                        MachinePointerInfo::getFixedStack(MF, FI)));
5598       for (const auto &Part : Parts) {
5599         SDValue PartValue = Part.first;
5600         SDValue PartOffset = Part.second;
5601         SDValue Address =
5602             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5603         MemOpChains.push_back(
5604             DAG.getStore(Chain, DL, PartValue, Address,
5605                          MachinePointerInfo::getFixedStack(MF, FI)));
5606       }
5607       ArgValue = SpillSlot;
5608     } else {
5609       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5610     }
5611 
5612     // Use local copy if it is a byval arg.
5613     if (Flags.isByVal())
5614       ArgValue = ByValArgs[j++];
5615 
5616     if (VA.isRegLoc()) {
5617       // Queue up the argument copies and emit them at the end.
5618       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5619     } else {
5620       assert(VA.isMemLoc() && "Argument not register or memory");
5621       assert(!IsTailCall && "Tail call not allowed if stack is used "
5622                             "for passing parameters");
5623 
5624       // Work out the address of the stack slot.
5625       if (!StackPtr.getNode())
5626         StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5627       SDValue Address =
5628           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5629                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
5630 
5631       // Emit the store.
5632       MemOpChains.push_back(
5633           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5634     }
5635   }
5636 
5637   // Join the stores, which are independent of one another.
5638   if (!MemOpChains.empty())
5639     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5640 
5641   SDValue Glue;
5642 
5643   // Build a sequence of copy-to-reg nodes, chained and glued together.
5644   for (auto &Reg : RegsToPass) {
5645     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5646     Glue = Chain.getValue(1);
5647   }
5648 
5649   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5650   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5651   // split it and then direct call can be matched by PseudoCALL.
5652   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5653     const GlobalValue *GV = S->getGlobal();
5654     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5655                            ? LoongArchII::MO_CALL
5656                            : LoongArchII::MO_CALL_PLT;
5657     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5658   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5659     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5660                            ? LoongArchII::MO_CALL
5661                            : LoongArchII::MO_CALL_PLT;
5662     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5663   }
5664 
5665   // The first call operand is the chain and the second is the target address.
5666   SmallVector<SDValue> Ops;
5667   Ops.push_back(Chain);
5668   Ops.push_back(Callee);
5669 
5670   // Add argument registers to the end of the list so that they are
5671   // known live into the call.
5672   for (auto &Reg : RegsToPass)
5673     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5674 
5675   if (!IsTailCall) {
5676     // Add a register mask operand representing the call-preserved registers.
5677     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5678     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5679     assert(Mask && "Missing call preserved mask for calling convention");
5680     Ops.push_back(DAG.getRegisterMask(Mask));
5681   }
5682 
5683   // Glue the call to the argument copies, if any.
5684   if (Glue.getNode())
5685     Ops.push_back(Glue);
5686 
5687   // Emit the call.
5688   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5689   unsigned Op;
5690   switch (DAG.getTarget().getCodeModel()) {
5691   default:
5692     report_fatal_error("Unsupported code model");
5693   case CodeModel::Small:
5694     Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5695     break;
5696   case CodeModel::Medium:
5697     assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5698     Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
5699     break;
5700   case CodeModel::Large:
5701     assert(Subtarget.is64Bit() && "Large code model requires LA64");
5702     Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
5703     break;
5704   }
5705 
5706   if (IsTailCall) {
5707     MF.getFrameInfo().setHasTailCall();
5708     SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5709     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5710     return Ret;
5711   }
5712 
5713   Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5714   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5715   Glue = Chain.getValue(1);
5716 
5717   // Mark the end of the call, which is glued to the call itself.
5718   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5719   Glue = Chain.getValue(1);
5720 
5721   // Assign locations to each value returned by this call.
5722   SmallVector<CCValAssign> RVLocs;
5723   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5724   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5725 
5726   // Copy all of the result registers out of their specified physreg.
5727   for (auto &VA : RVLocs) {
5728     // Copy the value out.
5729     SDValue RetValue =
5730         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5731     // Glue the RetValue to the end of the call sequence.
5732     Chain = RetValue.getValue(1);
5733     Glue = RetValue.getValue(2);
5734 
5735     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5736 
5737     InVals.push_back(RetValue);
5738   }
5739 
5740   return Chain;
5741 }
5742 
5743 bool LoongArchTargetLowering::CanLowerReturn(
5744     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5745     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
5746     const Type *RetTy) const {
5747   SmallVector<CCValAssign> RVLocs;
5748   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5749 
5750   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5751     LoongArchABI::ABI ABI =
5752         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5753     if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5754                      Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5755                      nullptr))
5756       return false;
5757   }
5758   return true;
5759 }
5760 
5761 SDValue LoongArchTargetLowering::LowerReturn(
5762     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5763     const SmallVectorImpl<ISD::OutputArg> &Outs,
5764     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5765     SelectionDAG &DAG) const {
5766   // Stores the assignment of the return value to a location.
5767   SmallVector<CCValAssign> RVLocs;
5768 
5769   // Info about the registers and stack slot.
5770   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5771                  *DAG.getContext());
5772 
5773   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5774                     nullptr, CC_LoongArch);
5775   if (CallConv == CallingConv::GHC && !RVLocs.empty())
5776     report_fatal_error("GHC functions return void only");
5777   SDValue Glue;
5778   SmallVector<SDValue, 4> RetOps(1, Chain);
5779 
5780   // Copy the result values into the output registers.
5781   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5782     CCValAssign &VA = RVLocs[i];
5783     assert(VA.isRegLoc() && "Can only return in registers!");
5784 
5785     // Handle a 'normal' return.
5786     SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5787     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5788 
5789     // Guarantee that all emitted copies are stuck together.
5790     Glue = Chain.getValue(1);
5791     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5792   }
5793 
5794   RetOps[0] = Chain; // Update chain.
5795 
5796   // Add the glue node if we have it.
5797   if (Glue.getNode())
5798     RetOps.push_back(Glue);
5799 
5800   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5801 }
5802 
5803 bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
5804                                                EVT VT) const {
5805   if (!Subtarget.hasExtLSX())
5806     return false;
5807 
5808   if (VT == MVT::f32) {
5809     uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
5810     return (masked == 0x3e000000 || masked == 0x40000000);
5811   }
5812 
5813   if (VT == MVT::f64) {
5814     uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
5815     return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
5816   }
5817 
5818   return false;
5819 }
5820 
5821 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5822                                            bool ForCodeSize) const {
5823   // TODO: Maybe need more checks here after vector extension is supported.
5824   if (VT == MVT::f32 && !Subtarget.hasBasicF())
5825     return false;
5826   if (VT == MVT::f64 && !Subtarget.hasBasicD())
5827     return false;
5828   return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
5829 }
5830 
5831 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
5832   return true;
5833 }
5834 
5835 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
5836   return true;
5837 }
5838 
5839 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5840     const Instruction *I) const {
5841   if (!Subtarget.is64Bit())
5842     return isa<LoadInst>(I) || isa<StoreInst>(I);
5843 
5844   if (isa<LoadInst>(I))
5845     return true;
5846 
5847   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5848   // require fences beacuse we can use amswap_db.[w/d].
5849   Type *Ty = I->getOperand(0)->getType();
5850   if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5851     unsigned Size = Ty->getIntegerBitWidth();
5852     return (Size == 8 || Size == 16);
5853   }
5854 
5855   return false;
5856 }
5857 
5858 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
5859                                                 LLVMContext &Context,
5860                                                 EVT VT) const {
5861   if (!VT.isVector())
5862     return getPointerTy(DL);
5863   return VT.changeVectorElementTypeToInteger();
5864 }
5865 
5866 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
5867   // TODO: Support vectors.
5868   return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5869 }
5870 
5871 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5872                                                  const CallInst &I,
5873                                                  MachineFunction &MF,
5874                                                  unsigned Intrinsic) const {
5875   switch (Intrinsic) {
5876   default:
5877     return false;
5878   case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5879   case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5880   case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5881   case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5882     Info.opc = ISD::INTRINSIC_W_CHAIN;
5883     Info.memVT = MVT::i32;
5884     Info.ptrVal = I.getArgOperand(0);
5885     Info.offset = 0;
5886     Info.align = Align(4);
5887     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5888                  MachineMemOperand::MOVolatile;
5889     return true;
5890     // TODO: Add more Intrinsics later.
5891   }
5892 }
5893 
5894 // When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
5895 // atomicrmw and/or/xor operations with operands less than 32 bits cannot be
5896 // expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
5897 // regression, we need to implement it manually.
5898 void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
5899   AtomicRMWInst::BinOp Op = AI->getOperation();
5900 
5901   assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
5902           Op == AtomicRMWInst::And) &&
5903          "Unable to expand");
5904   unsigned MinWordSize = 4;
5905 
5906   IRBuilder<> Builder(AI);
5907   LLVMContext &Ctx = Builder.getContext();
5908   const DataLayout &DL = AI->getDataLayout();
5909   Type *ValueType = AI->getType();
5910   Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
5911 
5912   Value *Addr = AI->getPointerOperand();
5913   PointerType *PtrTy = cast<PointerType>(Addr->getType());
5914   IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
5915 
5916   Value *AlignedAddr = Builder.CreateIntrinsic(
5917       Intrinsic::ptrmask, {PtrTy, IntTy},
5918       {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
5919       "AlignedAddr");
5920 
5921   Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
5922   Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
5923   Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
5924   ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
5925   Value *Mask = Builder.CreateShl(
5926       ConstantInt::get(WordType,
5927                        (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
5928       ShiftAmt, "Mask");
5929   Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
5930   Value *ValOperand_Shifted =
5931       Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
5932                         ShiftAmt, "ValOperand_Shifted");
5933   Value *NewOperand;
5934   if (Op == AtomicRMWInst::And)
5935     NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
5936   else
5937     NewOperand = ValOperand_Shifted;
5938 
5939   AtomicRMWInst *NewAI =
5940       Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
5941                               AI->getOrdering(), AI->getSyncScopeID());
5942 
5943   Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
5944   Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
5945   Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
5946   AI->replaceAllUsesWith(FinalOldResult);
5947   AI->eraseFromParent();
5948 }
5949 
5950 TargetLowering::AtomicExpansionKind
5951 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
5952   // TODO: Add more AtomicRMWInst that needs to be extended.
5953 
5954   // Since floating-point operation requires a non-trivial set of data
5955   // operations, use CmpXChg to expand.
5956   if (AI->isFloatingPointOperation() ||
5957       AI->getOperation() == AtomicRMWInst::UIncWrap ||
5958       AI->getOperation() == AtomicRMWInst::UDecWrap ||
5959       AI->getOperation() == AtomicRMWInst::USubCond ||
5960       AI->getOperation() == AtomicRMWInst::USubSat)
5961     return AtomicExpansionKind::CmpXChg;
5962 
5963   if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
5964       (AI->getOperation() == AtomicRMWInst::Xchg ||
5965        AI->getOperation() == AtomicRMWInst::Add ||
5966        AI->getOperation() == AtomicRMWInst::Sub)) {
5967     return AtomicExpansionKind::None;
5968   }
5969 
5970   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5971   if (Subtarget.hasLAMCAS()) {
5972     if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
5973                       AI->getOperation() == AtomicRMWInst::Or ||
5974                       AI->getOperation() == AtomicRMWInst::Xor))
5975       return AtomicExpansionKind::Expand;
5976     if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
5977       return AtomicExpansionKind::CmpXChg;
5978   }
5979 
5980   if (Size == 8 || Size == 16)
5981     return AtomicExpansionKind::MaskedIntrinsic;
5982   return AtomicExpansionKind::None;
5983 }
5984 
5985 static Intrinsic::ID
5986 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
5987                                     AtomicRMWInst::BinOp BinOp) {
5988   if (GRLen == 64) {
5989     switch (BinOp) {
5990     default:
5991       llvm_unreachable("Unexpected AtomicRMW BinOp");
5992     case AtomicRMWInst::Xchg:
5993       return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5994     case AtomicRMWInst::Add:
5995       return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5996     case AtomicRMWInst::Sub:
5997       return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5998     case AtomicRMWInst::Nand:
5999       return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
6000     case AtomicRMWInst::UMax:
6001       return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
6002     case AtomicRMWInst::UMin:
6003       return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
6004     case AtomicRMWInst::Max:
6005       return Intrinsic::loongarch_masked_atomicrmw_max_i64;
6006     case AtomicRMWInst::Min:
6007       return Intrinsic::loongarch_masked_atomicrmw_min_i64;
6008       // TODO: support other AtomicRMWInst.
6009     }
6010   }
6011 
6012   if (GRLen == 32) {
6013     switch (BinOp) {
6014     default:
6015       llvm_unreachable("Unexpected AtomicRMW BinOp");
6016     case AtomicRMWInst::Xchg:
6017       return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
6018     case AtomicRMWInst::Add:
6019       return Intrinsic::loongarch_masked_atomicrmw_add_i32;
6020     case AtomicRMWInst::Sub:
6021       return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
6022     case AtomicRMWInst::Nand:
6023       return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
6024       // TODO: support other AtomicRMWInst.
6025     }
6026   }
6027 
6028   llvm_unreachable("Unexpected GRLen\n");
6029 }
6030 
6031 TargetLowering::AtomicExpansionKind
6032 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
6033     AtomicCmpXchgInst *CI) const {
6034 
6035   if (Subtarget.hasLAMCAS())
6036     return AtomicExpansionKind::None;
6037 
6038   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
6039   if (Size == 8 || Size == 16)
6040     return AtomicExpansionKind::MaskedIntrinsic;
6041   return AtomicExpansionKind::None;
6042 }
6043 
6044 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
6045     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
6046     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
6047   AtomicOrdering FailOrd = CI->getFailureOrdering();
6048   Value *FailureOrdering =
6049       Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
6050 
6051   // TODO: Support cmpxchg on LA32.
6052   Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
6053   CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
6054   NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
6055   Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6056   Type *Tys[] = {AlignedAddr->getType()};
6057   Value *Result = Builder.CreateIntrinsic(
6058       CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
6059   Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6060   return Result;
6061 }
6062 
6063 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
6064     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
6065     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
6066   // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
6067   // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
6068   // mask, as this produces better code than the LL/SC loop emitted by
6069   // int_loongarch_masked_atomicrmw_xchg.
6070   if (AI->getOperation() == AtomicRMWInst::Xchg &&
6071       isa<ConstantInt>(AI->getValOperand())) {
6072     ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
6073     if (CVal->isZero())
6074       return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
6075                                      Builder.CreateNot(Mask, "Inv_Mask"),
6076                                      AI->getAlign(), Ord);
6077     if (CVal->isMinusOne())
6078       return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
6079                                      AI->getAlign(), Ord);
6080   }
6081 
6082   unsigned GRLen = Subtarget.getGRLen();
6083   Value *Ordering =
6084       Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
6085   Type *Tys[] = {AlignedAddr->getType()};
6086   Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
6087       AI->getModule(),
6088       getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
6089 
6090   if (GRLen == 64) {
6091     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
6092     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6093     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
6094   }
6095 
6096   Value *Result;
6097 
6098   // Must pass the shift amount needed to sign extend the loaded value prior
6099   // to performing a signed comparison for min/max. ShiftAmt is the number of
6100   // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
6101   // is the number of bits to left+right shift the value in order to
6102   // sign-extend.
6103   if (AI->getOperation() == AtomicRMWInst::Min ||
6104       AI->getOperation() == AtomicRMWInst::Max) {
6105     const DataLayout &DL = AI->getDataLayout();
6106     unsigned ValWidth =
6107         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
6108     Value *SextShamt =
6109         Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
6110     Result = Builder.CreateCall(LlwOpScwLoop,
6111                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
6112   } else {
6113     Result =
6114         Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
6115   }
6116 
6117   if (GRLen == 64)
6118     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6119   return Result;
6120 }
6121 
6122 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
6123     const MachineFunction &MF, EVT VT) const {
6124   VT = VT.getScalarType();
6125 
6126   if (!VT.isSimple())
6127     return false;
6128 
6129   switch (VT.getSimpleVT().SimpleTy) {
6130   case MVT::f32:
6131   case MVT::f64:
6132     return true;
6133   default:
6134     break;
6135   }
6136 
6137   return false;
6138 }
6139 
6140 Register LoongArchTargetLowering::getExceptionPointerRegister(
6141     const Constant *PersonalityFn) const {
6142   return LoongArch::R4;
6143 }
6144 
6145 Register LoongArchTargetLowering::getExceptionSelectorRegister(
6146     const Constant *PersonalityFn) const {
6147   return LoongArch::R5;
6148 }
6149 
6150 //===----------------------------------------------------------------------===//
6151 // Target Optimization Hooks
6152 //===----------------------------------------------------------------------===//
6153 
6154 static int getEstimateRefinementSteps(EVT VT,
6155                                       const LoongArchSubtarget &Subtarget) {
6156   // Feature FRECIPE instrucions relative accuracy is 2^-14.
6157   // IEEE float has 23 digits and double has 52 digits.
6158   int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
6159   return RefinementSteps;
6160 }
6161 
6162 SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
6163                                                  SelectionDAG &DAG, int Enabled,
6164                                                  int &RefinementSteps,
6165                                                  bool &UseOneConstNR,
6166                                                  bool Reciprocal) const {
6167   if (Subtarget.hasFrecipe()) {
6168     SDLoc DL(Operand);
6169     EVT VT = Operand.getValueType();
6170 
6171     if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6172         (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6173         (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6174         (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6175         (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6176 
6177       if (RefinementSteps == ReciprocalEstimate::Unspecified)
6178         RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6179 
6180       SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
6181       if (Reciprocal)
6182         Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
6183 
6184       return Estimate;
6185     }
6186   }
6187 
6188   return SDValue();
6189 }
6190 
6191 SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
6192                                                   SelectionDAG &DAG,
6193                                                   int Enabled,
6194                                                   int &RefinementSteps) const {
6195   if (Subtarget.hasFrecipe()) {
6196     SDLoc DL(Operand);
6197     EVT VT = Operand.getValueType();
6198 
6199     if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6200         (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6201         (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6202         (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6203         (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6204 
6205       if (RefinementSteps == ReciprocalEstimate::Unspecified)
6206         RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6207 
6208       return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
6209     }
6210   }
6211 
6212   return SDValue();
6213 }
6214 
6215 //===----------------------------------------------------------------------===//
6216 //                           LoongArch Inline Assembly Support
6217 //===----------------------------------------------------------------------===//
6218 
6219 LoongArchTargetLowering::ConstraintType
6220 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
6221   // LoongArch specific constraints in GCC: config/loongarch/constraints.md
6222   //
6223   // 'f':  A floating-point register (if available).
6224   // 'k':  A memory operand whose address is formed by a base register and
6225   //       (optionally scaled) index register.
6226   // 'l':  A signed 16-bit constant.
6227   // 'm':  A memory operand whose address is formed by a base register and
6228   //       offset that is suitable for use in instructions with the same
6229   //       addressing mode as st.w and ld.w.
6230   // 'I':  A signed 12-bit constant (for arithmetic instructions).
6231   // 'J':  Integer zero.
6232   // 'K':  An unsigned 12-bit constant (for logic instructions).
6233   // "ZB": An address that is held in a general-purpose register. The offset is
6234   //       zero.
6235   // "ZC": A memory operand whose address is formed by a base register and
6236   //       offset that is suitable for use in instructions with the same
6237   //       addressing mode as ll.w and sc.w.
6238   if (Constraint.size() == 1) {
6239     switch (Constraint[0]) {
6240     default:
6241       break;
6242     case 'f':
6243       return C_RegisterClass;
6244     case 'l':
6245     case 'I':
6246     case 'J':
6247     case 'K':
6248       return C_Immediate;
6249     case 'k':
6250       return C_Memory;
6251     }
6252   }
6253 
6254   if (Constraint == "ZC" || Constraint == "ZB")
6255     return C_Memory;
6256 
6257   // 'm' is handled here.
6258   return TargetLowering::getConstraintType(Constraint);
6259 }
6260 
6261 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
6262     StringRef ConstraintCode) const {
6263   return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
6264       .Case("k", InlineAsm::ConstraintCode::k)
6265       .Case("ZB", InlineAsm::ConstraintCode::ZB)
6266       .Case("ZC", InlineAsm::ConstraintCode::ZC)
6267       .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
6268 }
6269 
6270 std::pair<unsigned, const TargetRegisterClass *>
6271 LoongArchTargetLowering::getRegForInlineAsmConstraint(
6272     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
6273   // First, see if this is a constraint that directly corresponds to a LoongArch
6274   // register class.
6275   if (Constraint.size() == 1) {
6276     switch (Constraint[0]) {
6277     case 'r':
6278       // TODO: Support fixed vectors up to GRLen?
6279       if (VT.isVector())
6280         break;
6281       return std::make_pair(0U, &LoongArch::GPRRegClass);
6282     case 'f':
6283       if (Subtarget.hasBasicF() && VT == MVT::f32)
6284         return std::make_pair(0U, &LoongArch::FPR32RegClass);
6285       if (Subtarget.hasBasicD() && VT == MVT::f64)
6286         return std::make_pair(0U, &LoongArch::FPR64RegClass);
6287       if (Subtarget.hasExtLSX() &&
6288           TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
6289         return std::make_pair(0U, &LoongArch::LSX128RegClass);
6290       if (Subtarget.hasExtLASX() &&
6291           TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
6292         return std::make_pair(0U, &LoongArch::LASX256RegClass);
6293       break;
6294     default:
6295       break;
6296     }
6297   }
6298 
6299   // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
6300   // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
6301   // constraints while the official register name is prefixed with a '$'. So we
6302   // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
6303   // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
6304   // case insensitive, so no need to convert the constraint to upper case here.
6305   //
6306   // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
6307   // decode the usage of register name aliases into their official names. And
6308   // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
6309   // official register names.
6310   if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
6311       Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
6312     bool IsFP = Constraint[2] == 'f';
6313     std::pair<StringRef, StringRef> Temp = Constraint.split('$');
6314     std::pair<unsigned, const TargetRegisterClass *> R;
6315     R = TargetLowering::getRegForInlineAsmConstraint(
6316         TRI, join_items("", Temp.first, Temp.second), VT);
6317     // Match those names to the widest floating point register type available.
6318     if (IsFP) {
6319       unsigned RegNo = R.first;
6320       if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
6321         if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
6322           unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
6323           return std::make_pair(DReg, &LoongArch::FPR64RegClass);
6324         }
6325       }
6326     }
6327     return R;
6328   }
6329 
6330   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
6331 }
6332 
6333 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
6334     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
6335     SelectionDAG &DAG) const {
6336   // Currently only support length 1 constraints.
6337   if (Constraint.size() == 1) {
6338     switch (Constraint[0]) {
6339     case 'l':
6340       // Validate & create a 16-bit signed immediate operand.
6341       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6342         uint64_t CVal = C->getSExtValue();
6343         if (isInt<16>(CVal))
6344           Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6345                                                     Subtarget.getGRLenVT()));
6346       }
6347       return;
6348     case 'I':
6349       // Validate & create a 12-bit signed immediate operand.
6350       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6351         uint64_t CVal = C->getSExtValue();
6352         if (isInt<12>(CVal))
6353           Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6354                                                     Subtarget.getGRLenVT()));
6355       }
6356       return;
6357     case 'J':
6358       // Validate & create an integer zero operand.
6359       if (auto *C = dyn_cast<ConstantSDNode>(Op))
6360         if (C->getZExtValue() == 0)
6361           Ops.push_back(
6362               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
6363       return;
6364     case 'K':
6365       // Validate & create a 12-bit unsigned immediate operand.
6366       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6367         uint64_t CVal = C->getZExtValue();
6368         if (isUInt<12>(CVal))
6369           Ops.push_back(
6370               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
6371       }
6372       return;
6373     default:
6374       break;
6375     }
6376   }
6377   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6378 }
6379 
6380 #define GET_REGISTER_MATCHER
6381 #include "LoongArchGenAsmMatcher.inc"
6382 
6383 Register
6384 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
6385                                            const MachineFunction &MF) const {
6386   std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6387   std::string NewRegName = Name.second.str();
6388   Register Reg = MatchRegisterAltName(NewRegName);
6389   if (Reg == LoongArch::NoRegister)
6390     Reg = MatchRegisterName(NewRegName);
6391   if (Reg == LoongArch::NoRegister)
6392     report_fatal_error(
6393         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6394   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6395   if (!ReservedRegs.test(Reg))
6396     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6397                              StringRef(RegName) + "\"."));
6398   return Reg;
6399 }
6400 
6401 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
6402                                                      EVT VT, SDValue C) const {
6403   // TODO: Support vectors.
6404   if (!VT.isScalarInteger())
6405     return false;
6406 
6407   // Omit the optimization if the data size exceeds GRLen.
6408   if (VT.getSizeInBits() > Subtarget.getGRLen())
6409     return false;
6410 
6411   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6412     const APInt &Imm = ConstNode->getAPIntValue();
6413     // Break MUL into (SLLI + ADD/SUB) or ALSL.
6414     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6415         (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6416       return true;
6417     // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6418     if (ConstNode->hasOneUse() &&
6419         ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6420          (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6421       return true;
6422     // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6423     // in which the immediate has two set bits. Or Break (MUL x, imm)
6424     // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6425     // equals to (1 << s0) - (1 << s1).
6426     if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6427       unsigned Shifts = Imm.countr_zero();
6428       // Reject immediates which can be composed via a single LUI.
6429       if (Shifts >= 12)
6430         return false;
6431       // Reject multiplications can be optimized to
6432       // (SLLI (ALSL x, x, 1/2/3/4), s).
6433       APInt ImmPop = Imm.ashr(Shifts);
6434       if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6435         return false;
6436       // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6437       // since it needs one more instruction than other 3 cases.
6438       APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6439       if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6440           (ImmSmall - Imm).isPowerOf2())
6441         return true;
6442     }
6443   }
6444 
6445   return false;
6446 }
6447 
6448 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
6449                                                     const AddrMode &AM,
6450                                                     Type *Ty, unsigned AS,
6451                                                     Instruction *I) const {
6452   // LoongArch has four basic addressing modes:
6453   //  1. reg
6454   //  2. reg + 12-bit signed offset
6455   //  3. reg + 14-bit signed offset left-shifted by 2
6456   //  4. reg1 + reg2
6457   // TODO: Add more checks after support vector extension.
6458 
6459   // No global is ever allowed as a base.
6460   if (AM.BaseGV)
6461     return false;
6462 
6463   // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6464   // with `UAL` feature.
6465   if (!isInt<12>(AM.BaseOffs) &&
6466       !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6467     return false;
6468 
6469   switch (AM.Scale) {
6470   case 0:
6471     // "r+i" or just "i", depending on HasBaseReg.
6472     break;
6473   case 1:
6474     // "r+r+i" is not allowed.
6475     if (AM.HasBaseReg && AM.BaseOffs)
6476       return false;
6477     // Otherwise we have "r+r" or "r+i".
6478     break;
6479   case 2:
6480     // "2*r+r" or "2*r+i" is not allowed.
6481     if (AM.HasBaseReg || AM.BaseOffs)
6482       return false;
6483     // Allow "2*r" as "r+r".
6484     break;
6485   default:
6486     return false;
6487   }
6488 
6489   return true;
6490 }
6491 
6492 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
6493   return isInt<12>(Imm);
6494 }
6495 
6496 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
6497   return isInt<12>(Imm);
6498 }
6499 
6500 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
6501   // Zexts are free if they can be combined with a load.
6502   // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6503   // poorly with type legalization of compares preferring sext.
6504   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6505     EVT MemVT = LD->getMemoryVT();
6506     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6507         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6508          LD->getExtensionType() == ISD::ZEXTLOAD))
6509       return true;
6510   }
6511 
6512   return TargetLowering::isZExtFree(Val, VT2);
6513 }
6514 
6515 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
6516                                                     EVT DstVT) const {
6517   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6518 }
6519 
6520 bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
6521   return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6522 }
6523 
6524 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
6525   // TODO: Support vectors.
6526   if (Y.getValueType().isVector())
6527     return false;
6528 
6529   return !isa<ConstantSDNode>(Y);
6530 }
6531 
6532 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
6533   // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
6534   return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
6535 }
6536 
6537 bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
6538     Type *Ty, bool IsSigned) const {
6539   if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
6540     return true;
6541 
6542   return IsSigned;
6543 }
6544 
6545 bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
6546   // Return false to suppress the unnecessary extensions if the LibCall
6547   // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6548   if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6549                                   Type.getSizeInBits() < Subtarget.getGRLen()))
6550     return false;
6551   return true;
6552 }
6553 
6554 // memcpy, and other memory intrinsics, typically tries to use wider load/store
6555 // if the source/dest is aligned and the copy size is large enough. We therefore
6556 // want to align such objects passed to memory intrinsics.
6557 bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
6558                                                      unsigned &MinSize,
6559                                                      Align &PrefAlign) const {
6560   if (!isa<MemIntrinsic>(CI))
6561     return false;
6562 
6563   if (Subtarget.is64Bit()) {
6564     MinSize = 8;
6565     PrefAlign = Align(8);
6566   } else {
6567     MinSize = 4;
6568     PrefAlign = Align(4);
6569   }
6570 
6571   return true;
6572 }
6573