xref: /llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision bc74a1edbe5e6a3603e65efe06116fa72747acab)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Instruction.h"
23 
24 namespace llvm {
25 
26 namespace AArch64ISD {
27 
28 // For predicated nodes where the result is a vector, the operation is
29 // controlled by a governing predicate and the inactive lanes are explicitly
30 // defined with a value, please stick the following naming convention:
31 //
32 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
33 //                        to source operand OP<n>.
34 //
35 //    _MERGE_ZERO         The result value is a vector with inactive lanes
36 //                        actively zeroed.
37 //
38 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
39 //                        to the last source operand which only purpose is being
40 //                        a passthru value.
41 //
42 // For other cases where no explicit action is needed to set the inactive lanes,
43 // or when the result is not a vector and it is needed or helpful to
44 // distinguish a node from similar unpredicated nodes, use:
45 //
46 //    _PRED
47 //
48 enum NodeType : unsigned {
49   FIRST_NUMBER = ISD::BUILTIN_OP_END,
50   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
51   CALL,         // Function call.
52 
53   // Pseudo for a OBJC call that gets emitted together with a special `mov
54   // x29, x29` marker instruction.
55   CALL_RVMARKER,
56 
57   CALL_BTI, // Function call followed by a BTI instruction.
58 
59   // Function call, authenticating the callee value first:
60   // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands.
61   AUTH_CALL,
62   // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc,
63   // operands.
64   AUTH_TC_RETURN,
65 
66   // Authenticated variant of CALL_RVMARKER.
67   AUTH_CALL_RVMARKER,
68 
69   COALESCER_BARRIER,
70 
71   VG_SAVE,
72   VG_RESTORE,
73 
74   SMSTART,
75   SMSTOP,
76   RESTORE_ZA,
77   RESTORE_ZT,
78   SAVE_ZT,
79 
80   // A call with the callee in x16, i.e. "blr x16".
81   CALL_ARM64EC_TO_X64,
82 
83   // Produces the full sequence of instructions for getting the thread pointer
84   // offset of a variable into X0, using the TLSDesc model.
85   TLSDESC_CALLSEQ,
86   TLSDESC_AUTH_CALLSEQ,
87   ADRP,     // Page address of a TargetGlobalAddress operand.
88   ADR,      // ADR
89   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
90   LOADgot,  // Load from automatically generated descriptor (e.g. Global
91             // Offset Table, TLS record).
92   RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
93   BRCOND,   // Conditional branch instruction; "b.cond".
94   CSEL,
95   CSINV, // Conditional select invert.
96   CSNEG, // Conditional select negate.
97   CSINC, // Conditional select increment.
98 
99   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
100   // ELF.
101   THREAD_POINTER,
102   ADC,
103   SBC, // adc, sbc instructions
104 
105   // To avoid stack clash, allocation is performed by block and each block is
106   // probed.
107   PROBED_ALLOCA,
108 
109   // Predicated instructions where inactive lanes produce undefined results.
110   ABDS_PRED,
111   ABDU_PRED,
112   FADD_PRED,
113   FDIV_PRED,
114   FMA_PRED,
115   FMAX_PRED,
116   FMAXNM_PRED,
117   FMIN_PRED,
118   FMINNM_PRED,
119   FMUL_PRED,
120   FSUB_PRED,
121   HADDS_PRED,
122   HADDU_PRED,
123   MUL_PRED,
124   MULHS_PRED,
125   MULHU_PRED,
126   RHADDS_PRED,
127   RHADDU_PRED,
128   SDIV_PRED,
129   SHL_PRED,
130   SMAX_PRED,
131   SMIN_PRED,
132   SRA_PRED,
133   SRL_PRED,
134   UDIV_PRED,
135   UMAX_PRED,
136   UMIN_PRED,
137 
138   // Unpredicated vector instructions
139   BIC,
140 
141   SRAD_MERGE_OP1,
142 
143   // Predicated instructions with the result of inactive lanes provided by the
144   // last operand.
145   FABS_MERGE_PASSTHRU,
146   FCEIL_MERGE_PASSTHRU,
147   FFLOOR_MERGE_PASSTHRU,
148   FNEARBYINT_MERGE_PASSTHRU,
149   FNEG_MERGE_PASSTHRU,
150   FRECPX_MERGE_PASSTHRU,
151   FRINT_MERGE_PASSTHRU,
152   FROUND_MERGE_PASSTHRU,
153   FROUNDEVEN_MERGE_PASSTHRU,
154   FSQRT_MERGE_PASSTHRU,
155   FTRUNC_MERGE_PASSTHRU,
156   FP_ROUND_MERGE_PASSTHRU,
157   FP_EXTEND_MERGE_PASSTHRU,
158   UINT_TO_FP_MERGE_PASSTHRU,
159   SINT_TO_FP_MERGE_PASSTHRU,
160   FCVTX_MERGE_PASSTHRU,
161   FCVTZU_MERGE_PASSTHRU,
162   FCVTZS_MERGE_PASSTHRU,
163   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
164   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
165   ABS_MERGE_PASSTHRU,
166   NEG_MERGE_PASSTHRU,
167 
168   SETCC_MERGE_ZERO,
169 
170   // Arithmetic instructions which write flags.
171   ADDS,
172   SUBS,
173   ADCS,
174   SBCS,
175   ANDS,
176 
177   // Conditional compares. Operands: left,right,falsecc,cc,flags
178   CCMP,
179   CCMN,
180   FCCMP,
181 
182   // Floating point comparison
183   FCMP,
184 
185   // Scalar-to-vector duplication
186   DUP,
187   DUPLANE8,
188   DUPLANE16,
189   DUPLANE32,
190   DUPLANE64,
191   DUPLANE128,
192 
193   // Vector immedate moves
194   MOVI,
195   MOVIshift,
196   MOVIedit,
197   MOVImsl,
198   FMOV,
199   MVNIshift,
200   MVNImsl,
201 
202   // Vector immediate ops
203   BICi,
204   ORRi,
205 
206   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
207   // element must be identical.
208   BSP,
209 
210   // Vector shuffles
211   ZIP1,
212   ZIP2,
213   UZP1,
214   UZP2,
215   TRN1,
216   TRN2,
217   REV16,
218   REV32,
219   REV64,
220   EXT,
221   SPLICE,
222 
223   // Vector shift by scalar
224   VSHL,
225   VLSHR,
226   VASHR,
227 
228   // Vector shift by scalar (again)
229   SQSHL_I,
230   UQSHL_I,
231   SQSHLU_I,
232   SRSHR_I,
233   URSHR_I,
234   URSHR_I_PRED,
235 
236   // Vector narrowing shift by immediate (bottom)
237   RSHRNB_I,
238 
239   // Vector shift by constant and insert
240   VSLI,
241   VSRI,
242 
243   // Vector comparisons
244   CMEQ,
245   CMGE,
246   CMGT,
247   CMHI,
248   CMHS,
249   FCMEQ,
250   FCMGE,
251   FCMGT,
252 
253   // Vector zero comparisons
254   CMEQz,
255   CMGEz,
256   CMGTz,
257   CMLEz,
258   CMLTz,
259   FCMEQz,
260   FCMGEz,
261   FCMGTz,
262   FCMLEz,
263   FCMLTz,
264 
265   // Round wide FP to narrow FP with inexact results to odd.
266   FCVTXN,
267 
268   // Vector across-lanes addition
269   // Only the lower result lane is defined.
270   SADDV,
271   UADDV,
272 
273   // Unsigned sum Long across Vector
274   UADDLV,
275   SADDLV,
276 
277   // Wide adds
278   SADDWT,
279   SADDWB,
280   UADDWT,
281   UADDWB,
282 
283   // Add Pairwise of two vectors
284   ADDP,
285   // Add Long Pairwise
286   SADDLP,
287   UADDLP,
288 
289   // udot/sdot/usdot instructions
290   UDOT,
291   SDOT,
292   USDOT,
293 
294   // Vector across-lanes min/max
295   // Only the lower result lane is defined.
296   SMINV,
297   UMINV,
298   SMAXV,
299   UMAXV,
300 
301   SADDV_PRED,
302   UADDV_PRED,
303   SMAXV_PRED,
304   UMAXV_PRED,
305   SMINV_PRED,
306   UMINV_PRED,
307   ORV_PRED,
308   EORV_PRED,
309   ANDV_PRED,
310 
311   // Compare-and-branch
312   CBZ,
313   CBNZ,
314   TBZ,
315   TBNZ,
316 
317   // Tail calls
318   TC_RETURN,
319 
320   // Custom prefetch handling
321   PREFETCH,
322 
323   // {s|u}int to FP within a FP register.
324   SITOF,
325   UITOF,
326 
327   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
328   /// world w.r.t vectors; which causes additional REV instructions to be
329   /// generated to compensate for the byte-swapping. But sometimes we do
330   /// need to re-interpret the data in SIMD vector registers in big-endian
331   /// mode without emitting such REV instructions.
332   NVCAST,
333 
334   MRS, // MRS, also sets the flags via a glue.
335 
336   SMULL,
337   UMULL,
338 
339   PMULL,
340 
341   // Reciprocal estimates and steps.
342   FRECPE,
343   FRECPS,
344   FRSQRTE,
345   FRSQRTS,
346 
347   SUNPKHI,
348   SUNPKLO,
349   UUNPKHI,
350   UUNPKLO,
351 
352   CLASTA_N,
353   CLASTB_N,
354   LASTA,
355   LASTB,
356   TBL,
357 
358   // Floating-point reductions.
359   FADDA_PRED,
360   FADDV_PRED,
361   FMAXV_PRED,
362   FMAXNMV_PRED,
363   FMINV_PRED,
364   FMINNMV_PRED,
365 
366   INSR,
367   PTEST,
368   PTEST_ANY,
369   PTRUE,
370 
371   CTTZ_ELTS,
372 
373   BITREVERSE_MERGE_PASSTHRU,
374   BSWAP_MERGE_PASSTHRU,
375   REVH_MERGE_PASSTHRU,
376   REVW_MERGE_PASSTHRU,
377   CTLZ_MERGE_PASSTHRU,
378   CTPOP_MERGE_PASSTHRU,
379   DUP_MERGE_PASSTHRU,
380   INDEX_VECTOR,
381 
382   // Cast between vectors of the same element type but differ in length.
383   REINTERPRET_CAST,
384 
385   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
386   LS64_BUILD,
387   LS64_EXTRACT,
388 
389   LD1_MERGE_ZERO,
390   LD1S_MERGE_ZERO,
391   LDNF1_MERGE_ZERO,
392   LDNF1S_MERGE_ZERO,
393   LDFF1_MERGE_ZERO,
394   LDFF1S_MERGE_ZERO,
395   LD1RQ_MERGE_ZERO,
396   LD1RO_MERGE_ZERO,
397 
398   // Structured loads.
399   SVE_LD2_MERGE_ZERO,
400   SVE_LD3_MERGE_ZERO,
401   SVE_LD4_MERGE_ZERO,
402 
403   // Unsigned gather loads.
404   GLD1_MERGE_ZERO,
405   GLD1_SCALED_MERGE_ZERO,
406   GLD1_UXTW_MERGE_ZERO,
407   GLD1_SXTW_MERGE_ZERO,
408   GLD1_UXTW_SCALED_MERGE_ZERO,
409   GLD1_SXTW_SCALED_MERGE_ZERO,
410   GLD1_IMM_MERGE_ZERO,
411   GLD1Q_MERGE_ZERO,
412   GLD1Q_INDEX_MERGE_ZERO,
413 
414   // Signed gather loads
415   GLD1S_MERGE_ZERO,
416   GLD1S_SCALED_MERGE_ZERO,
417   GLD1S_UXTW_MERGE_ZERO,
418   GLD1S_SXTW_MERGE_ZERO,
419   GLD1S_UXTW_SCALED_MERGE_ZERO,
420   GLD1S_SXTW_SCALED_MERGE_ZERO,
421   GLD1S_IMM_MERGE_ZERO,
422 
423   // Unsigned gather loads.
424   GLDFF1_MERGE_ZERO,
425   GLDFF1_SCALED_MERGE_ZERO,
426   GLDFF1_UXTW_MERGE_ZERO,
427   GLDFF1_SXTW_MERGE_ZERO,
428   GLDFF1_UXTW_SCALED_MERGE_ZERO,
429   GLDFF1_SXTW_SCALED_MERGE_ZERO,
430   GLDFF1_IMM_MERGE_ZERO,
431 
432   // Signed gather loads.
433   GLDFF1S_MERGE_ZERO,
434   GLDFF1S_SCALED_MERGE_ZERO,
435   GLDFF1S_UXTW_MERGE_ZERO,
436   GLDFF1S_SXTW_MERGE_ZERO,
437   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
438   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
439   GLDFF1S_IMM_MERGE_ZERO,
440 
441   // Non-temporal gather loads
442   GLDNT1_MERGE_ZERO,
443   GLDNT1_INDEX_MERGE_ZERO,
444   GLDNT1S_MERGE_ZERO,
445 
446   // Contiguous masked store.
447   ST1_PRED,
448 
449   // Scatter store
450   SST1_PRED,
451   SST1_SCALED_PRED,
452   SST1_UXTW_PRED,
453   SST1_SXTW_PRED,
454   SST1_UXTW_SCALED_PRED,
455   SST1_SXTW_SCALED_PRED,
456   SST1_IMM_PRED,
457   SST1Q_PRED,
458   SST1Q_INDEX_PRED,
459 
460   // Non-temporal scatter store
461   SSTNT1_PRED,
462   SSTNT1_INDEX_PRED,
463 
464   // SME
465   RDSVL,
466   REVD_MERGE_PASSTHRU,
467   ALLOCATE_ZA_BUFFER,
468   INIT_TPIDR2OBJ,
469 
470   // Needed for __arm_agnostic("sme_za_state")
471   GET_SME_SAVE_SIZE,
472   ALLOC_SME_SAVE_BUFFER,
473 
474   // Asserts that a function argument (i32) is zero-extended to i8 by
475   // the caller
476   ASSERT_ZEXT_BOOL,
477 
478   // 128-bit system register accesses
479   // lo64, hi64, chain = MRRS(chain, sysregname)
480   MRRS,
481   // chain = MSRR(chain, sysregname, lo64, hi64)
482   MSRR,
483 
484   // Strict (exception-raising) floating point comparison
485   FIRST_STRICTFP_OPCODE,
486   STRICT_FCMP = FIRST_STRICTFP_OPCODE,
487   STRICT_FCMPE,
488   LAST_STRICTFP_OPCODE = STRICT_FCMPE,
489 
490   // NEON Load/Store with post-increment base updates
491   FIRST_MEMORY_OPCODE,
492   LD2post = FIRST_MEMORY_OPCODE,
493   LD3post,
494   LD4post,
495   ST2post,
496   ST3post,
497   ST4post,
498   LD1x2post,
499   LD1x3post,
500   LD1x4post,
501   ST1x2post,
502   ST1x3post,
503   ST1x4post,
504   LD1DUPpost,
505   LD2DUPpost,
506   LD3DUPpost,
507   LD4DUPpost,
508   LD1LANEpost,
509   LD2LANEpost,
510   LD3LANEpost,
511   LD4LANEpost,
512   ST2LANEpost,
513   ST3LANEpost,
514   ST4LANEpost,
515 
516   STG,
517   STZG,
518   ST2G,
519   STZ2G,
520 
521   LDP,
522   LDIAPP,
523   LDNP,
524   STP,
525   STILP,
526   STNP,
527   LAST_MEMORY_OPCODE = STNP,
528 
529   // SME ZA loads and stores
530   SME_ZA_LDR,
531   SME_ZA_STR,
532 };
533 
534 } // end namespace AArch64ISD
535 
536 namespace AArch64 {
537 /// Possible values of current rounding mode, which is specified in bits
538 /// 23:22 of FPCR.
539 enum Rounding {
540   RN = 0,    // Round to Nearest
541   RP = 1,    // Round towards Plus infinity
542   RM = 2,    // Round towards Minus infinity
543   RZ = 3,    // Round towards Zero
544   rmMask = 3 // Bit mask selecting rounding mode
545 };
546 
547 // Bit position of rounding mode bits in FPCR.
548 const unsigned RoundingBitsPos = 22;
549 
550 // Reserved bits should be preserved when modifying FPCR.
551 const uint64_t ReservedFPControlBits = 0xfffffffff80040f8;
552 
553 // Registers used to pass function arguments.
554 ArrayRef<MCPhysReg> getGPRArgRegs();
555 ArrayRef<MCPhysReg> getFPRArgRegs();
556 
557 /// Maximum allowed number of unprobed bytes above SP at an ABI
558 /// boundary.
559 const unsigned StackProbeMaxUnprobedStack = 1024;
560 
561 /// Maximum number of iterations to unroll for a constant size probing loop.
562 const unsigned StackProbeMaxLoopUnroll = 4;
563 
564 } // namespace AArch64
565 
566 class AArch64Subtarget;
567 
568 class AArch64TargetLowering : public TargetLowering {
569 public:
570   explicit AArch64TargetLowering(const TargetMachine &TM,
571                                  const AArch64Subtarget &STI);
572 
573   /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
574   /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
575   bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
576                            SDValue N1) const override;
577 
578   /// Selects the correct CCAssignFn for a given CallingConvention value.
579   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
580 
581   /// Selects the correct CCAssignFn for a given CallingConvention value.
582   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
583 
584   /// Determine which of the bits specified in Mask are known to be either zero
585   /// or one and return them in the KnownZero/KnownOne bitsets.
586   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
587                                      const APInt &DemandedElts,
588                                      const SelectionDAG &DAG,
589                                      unsigned Depth = 0) const override;
590 
591   unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
592                                            const APInt &DemandedElts,
593                                            const SelectionDAG &DAG,
594                                            unsigned Depth) const override;
595 
596   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
597     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
598     // *DAG* representation of pointers will always be 64-bits. They will be
599     // truncated and extended when transferred to memory, but the 64-bit DAG
600     // allows us to use AArch64's addressing modes much more easily.
601     return MVT::getIntegerVT(64);
602   }
603 
604   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
605                                     const APInt &DemandedElts,
606                                     TargetLoweringOpt &TLO) const override;
607 
608   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
609 
610   /// Returns true if the target allows unaligned memory accesses of the
611   /// specified type.
612   bool allowsMisalignedMemoryAccesses(
613       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
614       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
615       unsigned *Fast = nullptr) const override;
616   /// LLT variant.
617   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
618                                       Align Alignment,
619                                       MachineMemOperand::Flags Flags,
620                                       unsigned *Fast = nullptr) const override;
621 
622   /// Provide custom lowering hooks for some operations.
623   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
624 
625   const char *getTargetNodeName(unsigned Opcode) const override;
626 
627   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
628 
629   /// This method returns a target specific FastISel object, or null if the
630   /// target does not support "fast" ISel.
631   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
632                            const TargetLibraryInfo *libInfo) const override;
633 
634   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
635 
636   bool isFPImmLegal(const APFloat &Imm, EVT VT,
637                     bool ForCodeSize) const override;
638 
639   /// Return true if the given shuffle mask can be codegen'd directly, or if it
640   /// should be stack expanded.
641   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
642 
643   /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
644   /// shuffle mask can be codegen'd directly.
645   bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
646 
647   /// Return the ISD::SETCC ValueType.
648   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
649                          EVT VT) const override;
650 
651   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
652 
653   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
654                                   MachineBasicBlock *BB) const;
655 
656   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
657                                            MachineBasicBlock *BB) const;
658 
659   MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
660                                             MachineBasicBlock *MBB) const;
661 
662   MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
663                                   MachineInstr &MI,
664                                   MachineBasicBlock *BB) const;
665   MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
666   MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
667                                  MachineInstr &MI, MachineBasicBlock *BB) const;
668   MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
669                                  unsigned Opcode, bool Op0IsDef) const;
670   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
671   MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI,
672                                           MachineBasicBlock *BB) const;
673   MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI,
674                                           MachineBasicBlock *BB) const;
675   MachineBasicBlock *EmitAllocateSMESaveBuffer(MachineInstr &MI,
676                                                MachineBasicBlock *BB) const;
677   MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI,
678                                         MachineBasicBlock *BB) const;
679 
680   MachineBasicBlock *
681   EmitInstrWithCustomInserter(MachineInstr &MI,
682                               MachineBasicBlock *MBB) const override;
683 
684   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
685                           MachineFunction &MF,
686                           unsigned Intrinsic) const override;
687 
688   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
689                              EVT NewVT) const override;
690 
691   bool shouldRemoveRedundantExtend(SDValue Op) const override;
692 
693   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
694   bool isTruncateFree(EVT VT1, EVT VT2) const override;
695 
696   bool isProfitableToHoist(Instruction *I) const override;
697 
698   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
699   bool isZExtFree(EVT VT1, EVT VT2) const override;
700   bool isZExtFree(SDValue Val, EVT VT2) const override;
701 
702   bool optimizeExtendOrTruncateConversion(
703       Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
704 
705   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
706 
707   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
708 
709   bool lowerInterleavedLoad(LoadInst *LI,
710                             ArrayRef<ShuffleVectorInst *> Shuffles,
711                             ArrayRef<unsigned> Indices,
712                             unsigned Factor) const override;
713   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
714                              unsigned Factor) const override;
715 
716   bool lowerDeinterleaveIntrinsicToLoad(
717       LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override;
718 
719   bool lowerInterleaveIntrinsicToStore(
720       StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
721 
722   bool isLegalAddImmediate(int64_t) const override;
723   bool isLegalAddScalableImmediate(int64_t) const override;
724   bool isLegalICmpImmediate(int64_t) const override;
725 
726   bool isMulAddWithConstProfitable(SDValue AddNode,
727                                    SDValue ConstNode) const override;
728 
729   bool shouldConsiderGEPOffsetSplit() const override;
730 
731   EVT getOptimalMemOpType(const MemOp &Op,
732                           const AttributeList &FuncAttributes) const override;
733 
734   LLT getOptimalMemOpLLT(const MemOp &Op,
735                          const AttributeList &FuncAttributes) const override;
736 
737   /// Return true if the addressing mode represented by AM is legal for this
738   /// target, for a load/store of the specified type.
739   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
740                              unsigned AS,
741                              Instruction *I = nullptr) const override;
742 
743   int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
744                                          int64_t MaxOffset) const override;
745 
746   /// Return true if an FMA operation is faster than a pair of fmul and fadd
747   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
748   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
749   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
750                                   EVT VT) const override;
751   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
752 
753   bool generateFMAsInMachineCombiner(EVT VT,
754                                      CodeGenOptLevel OptLevel) const override;
755 
756   /// Return true if the target has native support for
757   /// the specified value type and it is 'desirable' to use the type for the
758   /// given node type.
759   bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
760 
761   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
762   ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
763 
764   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
765   bool isDesirableToCommuteWithShift(const SDNode *N,
766                                      CombineLevel Level) const override;
767 
768   bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
769     return false;
770   }
771 
772   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
773   bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
774 
775   /// Return true if it is profitable to fold a pair of shifts into a mask.
776   bool shouldFoldConstantShiftPairToMask(const SDNode *N,
777                                          CombineLevel Level) const override;
778 
779   bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
780                                             EVT VT) const override;
781 
782   /// Returns true if it is beneficial to convert a load of a constant
783   /// to just the constant itself.
784   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
785                                          Type *Ty) const override;
786 
787   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
788   /// with this index.
789   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
790                                unsigned Index) const override;
791 
792   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
793                             bool MathUsed) const override {
794     // Using overflow ops for overflow checks only should beneficial on
795     // AArch64.
796     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
797   }
798 
799   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
800                         AtomicOrdering Ord) const override;
801   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
802                               AtomicOrdering Ord) const override;
803 
804   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
805 
806   bool isOpSuitableForLDPSTP(const Instruction *I) const;
807   bool isOpSuitableForLSE128(const Instruction *I) const;
808   bool isOpSuitableForRCPC3(const Instruction *I) const;
809   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
810   bool
811   shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
812 
813   TargetLoweringBase::AtomicExpansionKind
814   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
815   TargetLoweringBase::AtomicExpansionKind
816   shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
817   TargetLoweringBase::AtomicExpansionKind
818   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
819 
820   TargetLoweringBase::AtomicExpansionKind
821   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
822 
823   bool useLoadStackGuardNode(const Module &M) const override;
824   TargetLoweringBase::LegalizeTypeAction
825   getPreferredVectorAction(MVT VT) const override;
826 
827   /// If the target has a standard location for the stack protector cookie,
828   /// returns the address of that location. Otherwise, returns nullptr.
829   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
830 
831   void insertSSPDeclarations(Module &M) const override;
832   Value *getSDagStackGuard(const Module &M) const override;
833   Function *getSSPStackGuardCheck(const Module &M) const override;
834 
835   /// If the target has a standard location for the unsafe stack pointer,
836   /// returns the address of that location. Otherwise, returns nullptr.
837   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
838 
839   /// If a physical register, this returns the register that receives the
840   /// exception address on entry to an EH pad.
841   Register
842   getExceptionPointerRegister(const Constant *PersonalityFn) const override;
843 
844   /// If a physical register, this returns the register that receives the
845   /// exception typeid on entry to a landing pad.
846   Register
847   getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
848 
849   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
850 
851   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
852                         const MachineFunction &MF) const override;
853 
854   bool isCheapToSpeculateCttz(Type *) const override {
855     return true;
856   }
857 
858   bool isCheapToSpeculateCtlz(Type *) const override {
859     return true;
860   }
861 
862   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
863 
864   bool hasAndNotCompare(SDValue V) const override {
865     // We can use bics for any scalar.
866     return V.getValueType().isScalarInteger();
867   }
868 
869   bool hasAndNot(SDValue Y) const override {
870     EVT VT = Y.getValueType();
871 
872     if (!VT.isVector())
873       return hasAndNotCompare(Y);
874 
875     TypeSize TS = VT.getSizeInBits();
876     // TODO: We should be able to use bic/bif too for SVE.
877     return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
878   }
879 
880   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
881       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
882       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
883       SelectionDAG &DAG) const override;
884 
885   ShiftLegalizationStrategy
886   preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
887                                      unsigned ExpansionFactor) const override;
888 
889   bool shouldTransformSignedTruncationCheck(EVT XVT,
890                                             unsigned KeptBits) const override {
891     // For vectors, we don't have a preference..
892     if (XVT.isVector())
893       return false;
894 
895     auto VTIsOk = [](EVT VT) -> bool {
896       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
897              VT == MVT::i64;
898     };
899 
900     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
901     // XVT will be larger than KeptBitsVT.
902     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
903     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
904   }
905 
906   bool preferIncOfAddToSubOfNot(EVT VT) const override;
907 
908   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
909 
910   bool shouldExpandCmpUsingSelects(EVT VT) const override;
911 
912   bool isComplexDeinterleavingSupported() const override;
913   bool isComplexDeinterleavingOperationSupported(
914       ComplexDeinterleavingOperation Operation, Type *Ty) const override;
915 
916   Value *createComplexDeinterleavingIR(
917       IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
918       ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
919       Value *Accumulator = nullptr) const override;
920 
921   bool supportSplitCSR(MachineFunction *MF) const override {
922     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
923            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
924   }
925   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
926   void insertCopiesSplitCSR(
927       MachineBasicBlock *Entry,
928       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
929 
930   bool supportSwiftError() const override {
931     return true;
932   }
933 
934   bool supportPtrAuthBundles() const override { return true; }
935 
936   bool supportKCFIBundles() const override { return true; }
937 
938   MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
939                               MachineBasicBlock::instr_iterator &MBBI,
940                               const TargetInstrInfo *TII) const override;
941 
942   /// Enable aggressive FMA fusion on targets that want it.
943   bool enableAggressiveFMAFusion(EVT VT) const override;
944 
945   /// Returns the size of the platform's va_list object.
946   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
947 
948   /// Returns true if \p VecTy is a legal interleaved access type. This
949   /// function checks the vector element type and the overall width of the
950   /// vector.
951   bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
952                                     bool &UseScalable) const;
953 
954   /// Returns the number of interleaved accesses that will be generated when
955   /// lowering accesses of the given type.
956   unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
957                                      bool UseScalable) const;
958 
959   MachineMemOperand::Flags getTargetMMOFlags(
960     const Instruction &I) const override;
961 
962   bool functionArgumentNeedsConsecutiveRegisters(
963       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
964       const DataLayout &DL) const override;
965 
966   /// Used for exception handling on Win64.
967   bool needsFixedCatchObjects() const override;
968 
969   bool fallBackToDAGISel(const Instruction &Inst) const override;
970 
971   /// SVE code generation for fixed length vectors does not custom lower
972   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
973   /// merge. However, merging them creates a BUILD_VECTOR that is just as
974   /// illegal as the original, thus leading to an infinite legalisation loop.
975   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
976   /// vector types this override can be removed.
977   bool mergeStoresAfterLegalization(EVT VT) const override;
978 
979   // If the platform/function should have a redzone, return the size in bytes.
980   unsigned getRedZoneSize(const Function &F) const {
981     if (F.hasFnAttribute(Attribute::NoRedZone))
982       return 0;
983     return 128;
984   }
985 
986   bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
987   EVT getPromotedVTForPredicate(EVT VT) const;
988 
989   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
990                              bool AllowUnknown = false) const override;
991 
992   bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
993 
994   bool
995   shouldExpandPartialReductionIntrinsic(const IntrinsicInst *I) const override;
996 
997   bool shouldExpandCttzElements(EVT VT) const override;
998 
999   bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override;
1000 
1001   /// If a change in streaming mode is required on entry to/return from a
1002   /// function call it emits and returns the corresponding SMSTART or SMSTOP
1003   /// node. \p Condition should be one of the enum values from
1004   /// AArch64SME::ToggleCondition.
1005   SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
1006                               SDValue Chain, SDValue InGlue, unsigned Condition,
1007                               SDValue PStateSM = SDValue()) const;
1008 
1009   bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
1010 
1011   // Normally SVE is only used for byte size vectors that do not fit within a
1012   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1013   // used for 64bit and 128bit vectors as well.
1014   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1015 
1016   // Follow NEON ABI rules even when using SVE for fixed length vectors.
1017   MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1018                                     EVT VT) const override;
1019   unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1020                                          CallingConv::ID CC,
1021                                          EVT VT) const override;
1022   unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
1023                                                 CallingConv::ID CC, EVT VT,
1024                                                 EVT &IntermediateVT,
1025                                                 unsigned &NumIntermediates,
1026                                                 MVT &RegisterVT) const override;
1027 
1028   /// True if stack clash protection is enabled for this functions.
1029   bool hasInlineStackProbe(const MachineFunction &MF) const override;
1030 
1031 #ifndef NDEBUG
1032   void verifyTargetSDNode(const SDNode *N) const override;
1033 #endif
1034 
1035 private:
1036   /// Keep a pointer to the AArch64Subtarget around so that we can
1037   /// make the right decision when generating code for different targets.
1038   const AArch64Subtarget *Subtarget;
1039 
1040   llvm::BumpPtrAllocator BumpAlloc;
1041   llvm::StringSaver Saver{BumpAlloc};
1042 
1043   bool isExtFreeImpl(const Instruction *Ext) const override;
1044 
1045   void addTypeForNEON(MVT VT);
1046   void addTypeForFixedLengthSVE(MVT VT);
1047   void addDRType(MVT VT);
1048   void addQRType(MVT VT);
1049 
1050   bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
1051 
1052   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1053                                bool isVarArg,
1054                                const SmallVectorImpl<ISD::InputArg> &Ins,
1055                                const SDLoc &DL, SelectionDAG &DAG,
1056                                SmallVectorImpl<SDValue> &InVals) const override;
1057 
1058   void AdjustInstrPostInstrSelection(MachineInstr &MI,
1059                                      SDNode *Node) const override;
1060 
1061   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
1062                     SmallVectorImpl<SDValue> &InVals) const override;
1063 
1064   SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1065                           CallingConv::ID CallConv, bool isVarArg,
1066                           const SmallVectorImpl<CCValAssign> &RVLocs,
1067                           const SDLoc &DL, SelectionDAG &DAG,
1068                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1069                           SDValue ThisVal, bool RequiresSMChange) const;
1070 
1071   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1072   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1073   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1074   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1075 
1076   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1077   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1078 
1079   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1080 
1081   SDValue LowerVECTOR_COMPRESS(SDValue Op, SelectionDAG &DAG) const;
1082 
1083   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1084   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1085   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1086 
1087   bool
1088   isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1089 
1090   /// Finds the incoming stack arguments which overlap the given fixed stack
1091   /// object and incorporates their load into the current chain. This prevents
1092   /// an upcoming store from clobbering the stack argument before it's used.
1093   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1094                               MachineFrameInfo &MFI, int ClobberedFI) const;
1095 
1096   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1097 
1098   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1099                            SDValue &Chain) const;
1100 
1101   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1102                       bool isVarArg,
1103                       const SmallVectorImpl<ISD::OutputArg> &Outs,
1104                       LLVMContext &Context, const Type *RetTy) const override;
1105 
1106   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1107                       const SmallVectorImpl<ISD::OutputArg> &Outs,
1108                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1109                       SelectionDAG &DAG) const override;
1110 
1111   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1112                         unsigned Flag) const;
1113   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1114                         unsigned Flag) const;
1115   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1116                         unsigned Flag) const;
1117   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1118                         unsigned Flag) const;
1119   SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1120                         unsigned Flag) const;
1121   template <class NodeTy>
1122   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1123   template <class NodeTy>
1124   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1125   template <class NodeTy>
1126   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1127   template <class NodeTy>
1128   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1129   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1130   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1131   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1132   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1133   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1134   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1135                                const SDLoc &DL, SelectionDAG &DAG) const;
1136   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1137                                  SelectionDAG &DAG) const;
1138   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1139   SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1140   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1141   SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1142   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1143   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1144   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1145   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1146                          SDValue TVal, SDValue FVal, const SDLoc &dl,
1147                          SelectionDAG &DAG) const;
1148   SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1149   SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1150   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1151   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1152   SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
1153   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1154   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1155   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1156   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1157   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1158   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1159   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1160   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1161   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1162   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1163   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1164   SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1165   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1166   SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1167   SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1168   SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1169   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1170   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1171   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1172   SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1173   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1174   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1175   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1176   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1177                               unsigned NewOp) const;
1178   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1179   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1180   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1181   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1182   SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1183   SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1184   SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const;
1185   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1186   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1187   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1188   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1189   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1190   SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1191   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1192   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1193   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1194   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1195   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1196   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1197   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1198   SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1199   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1200   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1201   SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const;
1202   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1203   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1204   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1205   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1206   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1207   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1208   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1209   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1210   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1211   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1212   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1213   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1214   SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1215   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1216 
1217   SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1218 
1219   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1220                                                SelectionDAG &DAG) const;
1221   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1222                                                SelectionDAG &DAG) const;
1223   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1224   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1225   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1226   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1227   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1228                               SelectionDAG &DAG) const;
1229   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1230   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1231   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1232   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1233                                             SelectionDAG &DAG) const;
1234   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1235                                               SelectionDAG &DAG) const;
1236   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1237   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1238   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1239   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1240                                              SelectionDAG &DAG) const;
1241   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1242   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1243   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1244   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1245   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1246                                               SelectionDAG &DAG) const;
1247   SDValue LowerFixedLengthBuildVectorToSVE(SDValue Op, SelectionDAG &DAG) const;
1248 
1249   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1250                         SmallVectorImpl<SDNode *> &Created) const override;
1251   SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1252                         SmallVectorImpl<SDNode *> &Created) const override;
1253   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1254                           int &ExtraSteps, bool &UseOneConst,
1255                           bool Reciprocal) const override;
1256   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1257                            int &ExtraSteps) const override;
1258   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1259                            const DenormalMode &Mode) const override;
1260   SDValue getSqrtResultForDenormInput(SDValue Operand,
1261                                       SelectionDAG &DAG) const override;
1262   unsigned combineRepeatedFPDivisors() const override;
1263 
1264   ConstraintType getConstraintType(StringRef Constraint) const override;
1265   Register getRegisterByName(const char* RegName, LLT VT,
1266                              const MachineFunction &MF) const override;
1267 
1268   /// Examine constraint string and operand type and determine a weight value.
1269   /// The operand object must already have been set up with the operand type.
1270   ConstraintWeight
1271   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1272                                  const char *constraint) const override;
1273 
1274   std::pair<unsigned, const TargetRegisterClass *>
1275   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1276                                StringRef Constraint, MVT VT) const override;
1277 
1278   const char *LowerXConstraint(EVT ConstraintVT) const override;
1279 
1280   void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1281                                     std::vector<SDValue> &Ops,
1282                                     SelectionDAG &DAG) const override;
1283 
1284   InlineAsm::ConstraintCode
1285   getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1286     if (ConstraintCode == "Q")
1287       return InlineAsm::ConstraintCode::Q;
1288     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1289     //        followed by llvm_unreachable so we'll leave them unimplemented in
1290     //        the backend for now.
1291     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1292   }
1293 
1294   /// Handle Lowering flag assembly outputs.
1295   SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1296                                       const SDLoc &DL,
1297                                       const AsmOperandInfo &Constraint,
1298                                       SelectionDAG &DAG) const override;
1299 
1300   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1301   bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1302   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1303   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1304   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1305   bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1306                               SDValue &Offset, SelectionDAG &DAG) const;
1307   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1308                                  ISD::MemIndexedMode &AM,
1309                                  SelectionDAG &DAG) const override;
1310   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1311                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1312                                   SelectionDAG &DAG) const override;
1313   bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1314                        bool IsPre, MachineRegisterInfo &MRI) const override;
1315 
1316   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1317                           SelectionDAG &DAG) const override;
1318   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1319                              SelectionDAG &DAG) const;
1320   void ReplaceExtractSubVectorResults(SDNode *N,
1321                                       SmallVectorImpl<SDValue> &Results,
1322                                       SelectionDAG &DAG) const;
1323 
1324   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1325 
1326   void finalizeLowering(MachineFunction &MF) const override;
1327 
1328   bool shouldLocalize(const MachineInstr &MI,
1329                       const TargetTransformInfo *TTI) const override;
1330 
1331   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1332                                          const APInt &OriginalDemandedBits,
1333                                          const APInt &OriginalDemandedElts,
1334                                          KnownBits &Known,
1335                                          TargetLoweringOpt &TLO,
1336                                          unsigned Depth) const override;
1337 
1338   bool isTargetCanonicalConstantNode(SDValue Op) const override;
1339 
1340   // With the exception of data-predicate transitions, no instructions are
1341   // required to cast between legal scalable vector types. However:
1342   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1343   //     is not universally useable.
1344   //  2. Most unpacked integer types are not legal and thus integer extends
1345   //     cannot be used to convert between unpacked and packed types.
1346   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1347   // to transition between unpacked and packed types of the same element type,
1348   // with BITCAST used otherwise.
1349   // This function does not handle predicate bitcasts.
1350   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1351 
1352   // Returns the runtime value for PSTATE.SM by generating a call to
1353   // __arm_sme_state.
1354   SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1355                              EVT VT) const;
1356 
1357   bool preferScalarizeSplat(SDNode *N) const override;
1358 
1359   unsigned getMinimumJumpTableEntries() const override;
1360 
1361   bool softPromoteHalfType() const override { return true; }
1362 
1363   bool shouldScalarizeBinop(SDValue VecOp) const override {
1364     return VecOp.getOpcode() == ISD::SETCC;
1365   }
1366 };
1367 
1368 namespace AArch64 {
1369 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1370                          const TargetLibraryInfo *libInfo);
1371 } // end namespace AArch64
1372 
1373 } // end namespace llvm
1374 
1375 #endif
1376