xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/Instruction.h"
24 
25 namespace llvm {
26 
27 namespace AArch64ISD {
28 
29 // For predicated nodes where the result is a vector, the operation is
30 // controlled by a governing predicate and the inactive lanes are explicitly
31 // defined with a value, please stick the following naming convention:
32 //
33 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
34 //                        to source operand OP<n>.
35 //
36 //    _MERGE_ZERO         The result value is a vector with inactive lanes
37 //                        actively zeroed.
38 //
39 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
40 //                        to the last source operand which only purpose is being
41 //                        a passthru value.
42 //
43 // For other cases where no explicit action is needed to set the inactive lanes,
44 // or when the result is not a vector and it is needed or helpful to
45 // distinguish a node from similar unpredicated nodes, use:
46 //
47 //    _PRED
48 //
49 enum NodeType : unsigned {
50   FIRST_NUMBER = ISD::BUILTIN_OP_END,
51   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
52   CALL,         // Function call.
53 
54   // Pseudo for a OBJC call that gets emitted together with a special `mov
55   // x29, x29` marker instruction.
56   CALL_RVMARKER,
57 
58   // Produces the full sequence of instructions for getting the thread pointer
59   // offset of a variable into X0, using the TLSDesc model.
60   TLSDESC_CALLSEQ,
61   ADRP,     // Page address of a TargetGlobalAddress operand.
62   ADR,      // ADR
63   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
64   LOADgot,  // Load from automatically generated descriptor (e.g. Global
65             // Offset Table, TLS record).
66   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
67   BRCOND,   // Conditional branch instruction; "b.cond".
68   CSEL,
69   CSINV, // Conditional select invert.
70   CSNEG, // Conditional select negate.
71   CSINC, // Conditional select increment.
72 
73   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
74   // ELF.
75   THREAD_POINTER,
76   ADC,
77   SBC, // adc, sbc instructions
78 
79   // Predicated instructions where inactive lanes produce undefined results.
80   ADD_PRED,
81   FADD_PRED,
82   FDIV_PRED,
83   FMA_PRED,
84   FMAXNM_PRED,
85   FMINNM_PRED,
86   FMAX_PRED,
87   FMIN_PRED,
88   FMUL_PRED,
89   FSUB_PRED,
90   MUL_PRED,
91   MULHS_PRED,
92   MULHU_PRED,
93   SDIV_PRED,
94   SHL_PRED,
95   SMAX_PRED,
96   SMIN_PRED,
97   SRA_PRED,
98   SRL_PRED,
99   SUB_PRED,
100   UDIV_PRED,
101   UMAX_PRED,
102   UMIN_PRED,
103 
104   // Unpredicated vector instructions
105   BIC,
106 
107   // Predicated instructions with the result of inactive lanes provided by the
108   // last operand.
109   FABS_MERGE_PASSTHRU,
110   FCEIL_MERGE_PASSTHRU,
111   FFLOOR_MERGE_PASSTHRU,
112   FNEARBYINT_MERGE_PASSTHRU,
113   FNEG_MERGE_PASSTHRU,
114   FRECPX_MERGE_PASSTHRU,
115   FRINT_MERGE_PASSTHRU,
116   FROUND_MERGE_PASSTHRU,
117   FROUNDEVEN_MERGE_PASSTHRU,
118   FSQRT_MERGE_PASSTHRU,
119   FTRUNC_MERGE_PASSTHRU,
120   FP_ROUND_MERGE_PASSTHRU,
121   FP_EXTEND_MERGE_PASSTHRU,
122   UINT_TO_FP_MERGE_PASSTHRU,
123   SINT_TO_FP_MERGE_PASSTHRU,
124   FCVTZU_MERGE_PASSTHRU,
125   FCVTZS_MERGE_PASSTHRU,
126   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
127   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
128   ABS_MERGE_PASSTHRU,
129   NEG_MERGE_PASSTHRU,
130 
131   SETCC_MERGE_ZERO,
132 
133   // Arithmetic instructions which write flags.
134   ADDS,
135   SUBS,
136   ADCS,
137   SBCS,
138   ANDS,
139 
140   // Conditional compares. Operands: left,right,falsecc,cc,flags
141   CCMP,
142   CCMN,
143   FCCMP,
144 
145   // Floating point comparison
146   FCMP,
147 
148   // Scalar extract
149   EXTR,
150 
151   // Scalar-to-vector duplication
152   DUP,
153   DUPLANE8,
154   DUPLANE16,
155   DUPLANE32,
156   DUPLANE64,
157 
158   // Vector immedate moves
159   MOVI,
160   MOVIshift,
161   MOVIedit,
162   MOVImsl,
163   FMOV,
164   MVNIshift,
165   MVNImsl,
166 
167   // Vector immediate ops
168   BICi,
169   ORRi,
170 
171   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
172   // element must be identical.
173   BSP,
174 
175   // Vector arithmetic negation
176   NEG,
177 
178   // Vector shuffles
179   ZIP1,
180   ZIP2,
181   UZP1,
182   UZP2,
183   TRN1,
184   TRN2,
185   REV16,
186   REV32,
187   REV64,
188   EXT,
189 
190   // Vector shift by scalar
191   VSHL,
192   VLSHR,
193   VASHR,
194 
195   // Vector shift by scalar (again)
196   SQSHL_I,
197   UQSHL_I,
198   SQSHLU_I,
199   SRSHR_I,
200   URSHR_I,
201 
202   // Vector shift by constant and insert
203   VSLI,
204   VSRI,
205 
206   // Vector comparisons
207   CMEQ,
208   CMGE,
209   CMGT,
210   CMHI,
211   CMHS,
212   FCMEQ,
213   FCMGE,
214   FCMGT,
215 
216   // Vector zero comparisons
217   CMEQz,
218   CMGEz,
219   CMGTz,
220   CMLEz,
221   CMLTz,
222   FCMEQz,
223   FCMGEz,
224   FCMGTz,
225   FCMLEz,
226   FCMLTz,
227 
228   // Vector across-lanes addition
229   // Only the lower result lane is defined.
230   SADDV,
231   UADDV,
232 
233   // Vector halving addition
234   SHADD,
235   UHADD,
236 
237   // Vector rounding halving addition
238   SRHADD,
239   URHADD,
240 
241   // Absolute difference
242   UABD,
243   SABD,
244 
245   // udot/sdot instructions
246   UDOT,
247   SDOT,
248 
249   // Vector across-lanes min/max
250   // Only the lower result lane is defined.
251   SMINV,
252   UMINV,
253   SMAXV,
254   UMAXV,
255 
256   SADDV_PRED,
257   UADDV_PRED,
258   SMAXV_PRED,
259   UMAXV_PRED,
260   SMINV_PRED,
261   UMINV_PRED,
262   ORV_PRED,
263   EORV_PRED,
264   ANDV_PRED,
265 
266   // Vector bitwise insertion
267   BIT,
268 
269   // Compare-and-branch
270   CBZ,
271   CBNZ,
272   TBZ,
273   TBNZ,
274 
275   // Tail calls
276   TC_RETURN,
277 
278   // Custom prefetch handling
279   PREFETCH,
280 
281   // {s|u}int to FP within a FP register.
282   SITOF,
283   UITOF,
284 
285   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
286   /// world w.r.t vectors; which causes additional REV instructions to be
287   /// generated to compensate for the byte-swapping. But sometimes we do
288   /// need to re-interpret the data in SIMD vector registers in big-endian
289   /// mode without emitting such REV instructions.
290   NVCAST,
291 
292   MRS, // MRS, also sets the flags via a glue.
293 
294   SMULL,
295   UMULL,
296 
297   // Reciprocal estimates and steps.
298   FRECPE,
299   FRECPS,
300   FRSQRTE,
301   FRSQRTS,
302 
303   SUNPKHI,
304   SUNPKLO,
305   UUNPKHI,
306   UUNPKLO,
307 
308   CLASTA_N,
309   CLASTB_N,
310   LASTA,
311   LASTB,
312   TBL,
313 
314   // Floating-point reductions.
315   FADDA_PRED,
316   FADDV_PRED,
317   FMAXV_PRED,
318   FMAXNMV_PRED,
319   FMINV_PRED,
320   FMINNMV_PRED,
321 
322   INSR,
323   PTEST,
324   PTRUE,
325 
326   BITREVERSE_MERGE_PASSTHRU,
327   BSWAP_MERGE_PASSTHRU,
328   CTLZ_MERGE_PASSTHRU,
329   CTPOP_MERGE_PASSTHRU,
330   DUP_MERGE_PASSTHRU,
331   INDEX_VECTOR,
332 
333   // Cast between vectors of the same element type but differ in length.
334   REINTERPRET_CAST,
335 
336   LD1_MERGE_ZERO,
337   LD1S_MERGE_ZERO,
338   LDNF1_MERGE_ZERO,
339   LDNF1S_MERGE_ZERO,
340   LDFF1_MERGE_ZERO,
341   LDFF1S_MERGE_ZERO,
342   LD1RQ_MERGE_ZERO,
343   LD1RO_MERGE_ZERO,
344 
345   // Structured loads.
346   SVE_LD2_MERGE_ZERO,
347   SVE_LD3_MERGE_ZERO,
348   SVE_LD4_MERGE_ZERO,
349 
350   // Unsigned gather loads.
351   GLD1_MERGE_ZERO,
352   GLD1_SCALED_MERGE_ZERO,
353   GLD1_UXTW_MERGE_ZERO,
354   GLD1_SXTW_MERGE_ZERO,
355   GLD1_UXTW_SCALED_MERGE_ZERO,
356   GLD1_SXTW_SCALED_MERGE_ZERO,
357   GLD1_IMM_MERGE_ZERO,
358 
359   // Signed gather loads
360   GLD1S_MERGE_ZERO,
361   GLD1S_SCALED_MERGE_ZERO,
362   GLD1S_UXTW_MERGE_ZERO,
363   GLD1S_SXTW_MERGE_ZERO,
364   GLD1S_UXTW_SCALED_MERGE_ZERO,
365   GLD1S_SXTW_SCALED_MERGE_ZERO,
366   GLD1S_IMM_MERGE_ZERO,
367 
368   // Unsigned gather loads.
369   GLDFF1_MERGE_ZERO,
370   GLDFF1_SCALED_MERGE_ZERO,
371   GLDFF1_UXTW_MERGE_ZERO,
372   GLDFF1_SXTW_MERGE_ZERO,
373   GLDFF1_UXTW_SCALED_MERGE_ZERO,
374   GLDFF1_SXTW_SCALED_MERGE_ZERO,
375   GLDFF1_IMM_MERGE_ZERO,
376 
377   // Signed gather loads.
378   GLDFF1S_MERGE_ZERO,
379   GLDFF1S_SCALED_MERGE_ZERO,
380   GLDFF1S_UXTW_MERGE_ZERO,
381   GLDFF1S_SXTW_MERGE_ZERO,
382   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
383   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
384   GLDFF1S_IMM_MERGE_ZERO,
385 
386   // Non-temporal gather loads
387   GLDNT1_MERGE_ZERO,
388   GLDNT1_INDEX_MERGE_ZERO,
389   GLDNT1S_MERGE_ZERO,
390 
391   // Contiguous masked store.
392   ST1_PRED,
393 
394   // Scatter store
395   SST1_PRED,
396   SST1_SCALED_PRED,
397   SST1_UXTW_PRED,
398   SST1_SXTW_PRED,
399   SST1_UXTW_SCALED_PRED,
400   SST1_SXTW_SCALED_PRED,
401   SST1_IMM_PRED,
402 
403   // Non-temporal scatter store
404   SSTNT1_PRED,
405   SSTNT1_INDEX_PRED,
406 
407   // Strict (exception-raising) floating point comparison
408   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
409   STRICT_FCMPE,
410 
411   // NEON Load/Store with post-increment base updates
412   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
413   LD3post,
414   LD4post,
415   ST2post,
416   ST3post,
417   ST4post,
418   LD1x2post,
419   LD1x3post,
420   LD1x4post,
421   ST1x2post,
422   ST1x3post,
423   ST1x4post,
424   LD1DUPpost,
425   LD2DUPpost,
426   LD3DUPpost,
427   LD4DUPpost,
428   LD1LANEpost,
429   LD2LANEpost,
430   LD3LANEpost,
431   LD4LANEpost,
432   ST2LANEpost,
433   ST3LANEpost,
434   ST4LANEpost,
435 
436   STG,
437   STZG,
438   ST2G,
439   STZ2G,
440 
441   LDP,
442   STP,
443   STNP,
444 };
445 
446 } // end namespace AArch64ISD
447 
448 namespace {
449 
450 // Any instruction that defines a 32-bit result zeros out the high half of the
451 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
452 // be copying from a truncate. But any other 32-bit operation will zero-extend
453 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
454 // 32 bits, they're probably just qualifying a CopyFromReg.
455 // FIXME: X86 also checks for CMOV here. Do we need something similar?
isDef32(const SDNode & N)456 static inline bool isDef32(const SDNode &N) {
457   unsigned Opc = N.getOpcode();
458   return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
459          Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
460          Opc != ISD::AssertZext;
461 }
462 
463 } // end anonymous namespace
464 
465 namespace AArch64 {
466 /// Possible values of current rounding mode, which is specified in bits
467 /// 23:22 of FPCR.
468 enum Rounding {
469   RN = 0,    // Round to Nearest
470   RP = 1,    // Round towards Plus infinity
471   RM = 2,    // Round towards Minus infinity
472   RZ = 3,    // Round towards Zero
473   rmMask = 3 // Bit mask selecting rounding mode
474 };
475 
476 // Bit position of rounding mode bits in FPCR.
477 const unsigned RoundingBitsPos = 22;
478 } // namespace AArch64
479 
480 class AArch64Subtarget;
481 class AArch64TargetMachine;
482 
483 class AArch64TargetLowering : public TargetLowering {
484 public:
485   explicit AArch64TargetLowering(const TargetMachine &TM,
486                                  const AArch64Subtarget &STI);
487 
488   /// Selects the correct CCAssignFn for a given CallingConvention value.
489   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
490 
491   /// Selects the correct CCAssignFn for a given CallingConvention value.
492   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
493 
494   /// Determine which of the bits specified in Mask are known to be either zero
495   /// or one and return them in the KnownZero/KnownOne bitsets.
496   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
497                                      const APInt &DemandedElts,
498                                      const SelectionDAG &DAG,
499                                      unsigned Depth = 0) const override;
500 
501   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
502     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
503     // *DAG* representation of pointers will always be 64-bits. They will be
504     // truncated and extended when transferred to memory, but the 64-bit DAG
505     // allows us to use AArch64's addressing modes much more easily.
506     return MVT::getIntegerVT(64);
507   }
508 
509   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
510                                     const APInt &DemandedElts,
511                                     TargetLoweringOpt &TLO) const override;
512 
513   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
514 
515   /// Returns true if the target allows unaligned memory accesses of the
516   /// specified type.
517   bool allowsMisalignedMemoryAccesses(
518       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
519       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
520       bool *Fast = nullptr) const override;
521   /// LLT variant.
522   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
523                                       Align Alignment,
524                                       MachineMemOperand::Flags Flags,
525                                       bool *Fast = nullptr) const override;
526 
527   /// Provide custom lowering hooks for some operations.
528   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
529 
530   const char *getTargetNodeName(unsigned Opcode) const override;
531 
532   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
533 
534   /// This method returns a target specific FastISel object, or null if the
535   /// target does not support "fast" ISel.
536   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
537                            const TargetLibraryInfo *libInfo) const override;
538 
539   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
540 
541   bool isFPImmLegal(const APFloat &Imm, EVT VT,
542                     bool ForCodeSize) const override;
543 
544   /// Return true if the given shuffle mask can be codegen'd directly, or if it
545   /// should be stack expanded.
546   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
547 
548   /// Return the ISD::SETCC ValueType.
549   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
550                          EVT VT) const override;
551 
552   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
553 
554   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
555                                   MachineBasicBlock *BB) const;
556 
557   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
558                                            MachineBasicBlock *BB) const;
559 
560   MachineBasicBlock *
561   EmitInstrWithCustomInserter(MachineInstr &MI,
562                               MachineBasicBlock *MBB) const override;
563 
564   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
565                           MachineFunction &MF,
566                           unsigned Intrinsic) const override;
567 
568   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
569                              EVT NewVT) const override;
570 
571   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
572   bool isTruncateFree(EVT VT1, EVT VT2) const override;
573 
574   bool isProfitableToHoist(Instruction *I) const override;
575 
576   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
577   bool isZExtFree(EVT VT1, EVT VT2) const override;
578   bool isZExtFree(SDValue Val, EVT VT2) const override;
579 
580   bool shouldSinkOperands(Instruction *I,
581                           SmallVectorImpl<Use *> &Ops) const override;
582 
583   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
584 
getMaxSupportedInterleaveFactor()585   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
586 
587   bool lowerInterleavedLoad(LoadInst *LI,
588                             ArrayRef<ShuffleVectorInst *> Shuffles,
589                             ArrayRef<unsigned> Indices,
590                             unsigned Factor) const override;
591   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
592                              unsigned Factor) const override;
593 
594   bool isLegalAddImmediate(int64_t) const override;
595   bool isLegalICmpImmediate(int64_t) const override;
596 
597   bool shouldConsiderGEPOffsetSplit() const override;
598 
599   EVT getOptimalMemOpType(const MemOp &Op,
600                           const AttributeList &FuncAttributes) const override;
601 
602   LLT getOptimalMemOpLLT(const MemOp &Op,
603                          const AttributeList &FuncAttributes) const override;
604 
605   /// Return true if the addressing mode represented by AM is legal for this
606   /// target, for a load/store of the specified type.
607   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
608                              unsigned AS,
609                              Instruction *I = nullptr) const override;
610 
611   /// Return the cost of the scaling factor used in the addressing
612   /// mode represented by AM for this target, for a load/store
613   /// of the specified type.
614   /// If the AM is supported, the return value must be >= 0.
615   /// If the AM is not supported, it returns a negative value.
616   InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
617                                        Type *Ty, unsigned AS) const override;
618 
619   /// Return true if an FMA operation is faster than a pair of fmul and fadd
620   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
621   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
622   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
623                                   EVT VT) const override;
624   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
625 
626   bool generateFMAsInMachineCombiner(EVT VT,
627                                      CodeGenOpt::Level OptLevel) const override;
628 
629   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
630 
631   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
632   bool isDesirableToCommuteWithShift(const SDNode *N,
633                                      CombineLevel Level) const override;
634 
635   /// Returns true if it is beneficial to convert a load of a constant
636   /// to just the constant itself.
637   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
638                                          Type *Ty) const override;
639 
640   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
641   /// with this index.
642   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
643                                unsigned Index) const override;
644 
shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)645   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
646                             bool MathUsed) const override {
647     // Using overflow ops for overflow checks only should beneficial on
648     // AArch64.
649     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
650   }
651 
652   Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
653                         AtomicOrdering Ord) const override;
654   Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
655                               Value *Addr, AtomicOrdering Ord) const override;
656 
657   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
658 
659   TargetLoweringBase::AtomicExpansionKind
660   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
661   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
662   TargetLoweringBase::AtomicExpansionKind
663   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
664 
665   TargetLoweringBase::AtomicExpansionKind
666   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
667 
668   bool useLoadStackGuardNode() const override;
669   TargetLoweringBase::LegalizeTypeAction
670   getPreferredVectorAction(MVT VT) const override;
671 
672   /// If the target has a standard location for the stack protector cookie,
673   /// returns the address of that location. Otherwise, returns nullptr.
674   Value *getIRStackGuard(IRBuilder<> &IRB) const override;
675 
676   void insertSSPDeclarations(Module &M) const override;
677   Value *getSDagStackGuard(const Module &M) const override;
678   Function *getSSPStackGuardCheck(const Module &M) const override;
679 
680   /// If the target has a standard location for the unsafe stack pointer,
681   /// returns the address of that location. Otherwise, returns nullptr.
682   Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
683 
684   /// If a physical register, this returns the register that receives the
685   /// exception address on entry to an EH pad.
686   Register
getExceptionPointerRegister(const Constant * PersonalityFn)687   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
688     // FIXME: This is a guess. Has this been defined yet?
689     return AArch64::X0;
690   }
691 
692   /// If a physical register, this returns the register that receives the
693   /// exception typeid on entry to a landing pad.
694   Register
getExceptionSelectorRegister(const Constant * PersonalityFn)695   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
696     // FIXME: This is a guess. Has this been defined yet?
697     return AArch64::X1;
698   }
699 
700   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
701 
canMergeStoresTo(unsigned AddressSpace,EVT MemVT,const SelectionDAG & DAG)702   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
703                         const SelectionDAG &DAG) const override {
704     // Do not merge to float value size (128 bytes) if no implicit
705     // float attribute is set.
706 
707     bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
708         Attribute::NoImplicitFloat);
709 
710     if (NoFloat)
711       return (MemVT.getSizeInBits() <= 64);
712     return true;
713   }
714 
isCheapToSpeculateCttz()715   bool isCheapToSpeculateCttz() const override {
716     return true;
717   }
718 
isCheapToSpeculateCtlz()719   bool isCheapToSpeculateCtlz() const override {
720     return true;
721   }
722 
723   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
724 
hasAndNotCompare(SDValue V)725   bool hasAndNotCompare(SDValue V) const override {
726     // We can use bics for any scalar.
727     return V.getValueType().isScalarInteger();
728   }
729 
hasAndNot(SDValue Y)730   bool hasAndNot(SDValue Y) const override {
731     EVT VT = Y.getValueType();
732 
733     if (!VT.isVector())
734       return hasAndNotCompare(Y);
735 
736     return VT.getSizeInBits() >= 64; // vector 'bic'
737   }
738 
739   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
740       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
741       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
742       SelectionDAG &DAG) const override;
743 
744   bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
745 
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)746   bool shouldTransformSignedTruncationCheck(EVT XVT,
747                                             unsigned KeptBits) const override {
748     // For vectors, we don't have a preference..
749     if (XVT.isVector())
750       return false;
751 
752     auto VTIsOk = [](EVT VT) -> bool {
753       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
754              VT == MVT::i64;
755     };
756 
757     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
758     // XVT will be larger than KeptBitsVT.
759     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
760     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
761   }
762 
763   bool preferIncOfAddToSubOfNot(EVT VT) const override;
764 
hasBitPreservingFPLogic(EVT VT)765   bool hasBitPreservingFPLogic(EVT VT) const override {
766     // FIXME: Is this always true? It should be true for vectors at least.
767     return VT == MVT::f32 || VT == MVT::f64;
768   }
769 
supportSplitCSR(MachineFunction * MF)770   bool supportSplitCSR(MachineFunction *MF) const override {
771     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
772            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
773   }
774   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
775   void insertCopiesSplitCSR(
776       MachineBasicBlock *Entry,
777       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
778 
supportSwiftError()779   bool supportSwiftError() const override {
780     return true;
781   }
782 
783   /// Enable aggressive FMA fusion on targets that want it.
784   bool enableAggressiveFMAFusion(EVT VT) const override;
785 
786   /// Returns the size of the platform's va_list object.
787   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
788 
789   /// Returns true if \p VecTy is a legal interleaved access type. This
790   /// function checks the vector element type and the overall width of the
791   /// vector.
792   bool isLegalInterleavedAccessType(VectorType *VecTy,
793                                     const DataLayout &DL) const;
794 
795   /// Returns the number of interleaved accesses that will be generated when
796   /// lowering accesses of the given type.
797   unsigned getNumInterleavedAccesses(VectorType *VecTy,
798                                      const DataLayout &DL) const;
799 
800   MachineMemOperand::Flags getTargetMMOFlags(
801     const Instruction &I) const override;
802 
803   bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
804                                                  CallingConv::ID CallConv,
805                                                  bool isVarArg) const override;
806   /// Used for exception handling on Win64.
807   bool needsFixedCatchObjects() const override;
808 
809   bool fallBackToDAGISel(const Instruction &Inst) const override;
810 
811   /// SVE code generation for fixed length vectors does not custom lower
812   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
813   /// merge. However, merging them creates a BUILD_VECTOR that is just as
814   /// illegal as the original, thus leading to an infinite legalisation loop.
815   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
816   /// vector types this override can be removed.
817   bool mergeStoresAfterLegalization(EVT VT) const override;
818 
819   // If the platform/function should have a redzone, return the size in bytes.
getRedZoneSize(const Function & F)820   unsigned getRedZoneSize(const Function &F) const {
821     if (F.hasFnAttribute(Attribute::NoRedZone))
822       return 0;
823     return 128;
824   }
825 
826   bool isAllActivePredicate(SDValue N) const;
827 
828 private:
829   /// Keep a pointer to the AArch64Subtarget around so that we can
830   /// make the right decision when generating code for different targets.
831   const AArch64Subtarget *Subtarget;
832 
833   bool isExtFreeImpl(const Instruction *Ext) const override;
834 
835   void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT);
836   void addTypeForFixedLengthSVE(MVT VT);
837   void addDRTypeForNEON(MVT VT);
838   void addQRTypeForNEON(MVT VT);
839 
840   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
841                                bool isVarArg,
842                                const SmallVectorImpl<ISD::InputArg> &Ins,
843                                const SDLoc &DL, SelectionDAG &DAG,
844                                SmallVectorImpl<SDValue> &InVals) const override;
845 
846   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
847                     SmallVectorImpl<SDValue> &InVals) const override;
848 
849   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
850                           CallingConv::ID CallConv, bool isVarArg,
851                           const SmallVectorImpl<ISD::InputArg> &Ins,
852                           const SDLoc &DL, SelectionDAG &DAG,
853                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
854                           SDValue ThisVal) const;
855 
856   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
857   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
858 
859   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
860   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
861 
862   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
863 
864   bool isEligibleForTailCallOptimization(
865       SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
866       const SmallVectorImpl<ISD::OutputArg> &Outs,
867       const SmallVectorImpl<SDValue> &OutVals,
868       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
869 
870   /// Finds the incoming stack arguments which overlap the given fixed stack
871   /// object and incorporates their load into the current chain. This prevents
872   /// an upcoming store from clobbering the stack argument before it's used.
873   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
874                               MachineFrameInfo &MFI, int ClobberedFI) const;
875 
876   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
877 
878   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
879                            SDValue &Chain) const;
880 
881   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
882                       bool isVarArg,
883                       const SmallVectorImpl<ISD::OutputArg> &Outs,
884                       LLVMContext &Context) const override;
885 
886   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
887                       const SmallVectorImpl<ISD::OutputArg> &Outs,
888                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
889                       SelectionDAG &DAG) const override;
890 
891   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
892                         unsigned Flag) const;
893   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
894                         unsigned Flag) const;
895   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
896                         unsigned Flag) const;
897   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
898                         unsigned Flag) const;
899   template <class NodeTy>
900   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
901   template <class NodeTy>
902   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
903   template <class NodeTy>
904   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
905   template <class NodeTy>
906   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
907   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
908   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
909   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
910   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
911   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
912   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
913                                const SDLoc &DL, SelectionDAG &DAG) const;
914   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
915                                  SelectionDAG &DAG) const;
916   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
917   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
918   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
919   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
920   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
921   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
922                          SDValue TVal, SDValue FVal, const SDLoc &dl,
923                          SelectionDAG &DAG) const;
924   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
925   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
926   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
927   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
928   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
929   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
930   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
931   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
932   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
933   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
934   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
935   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
936   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
937   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
938   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
939   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
940   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
941   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
942   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
943   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
944   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
945   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
946   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
947                               bool OverrideNEON = false) const;
948   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
949   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
950   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
951   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
952   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
953   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
954   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
955   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
956   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
957   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
958   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
959   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
960   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
961   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
962   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
963   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
964   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
965   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
966   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
967   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
968   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
969   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
970   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
971   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
972   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
973   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
974   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
975   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
976   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
977   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
978                                          SDValue &Size,
979                                          SelectionDAG &DAG) const;
980   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
981                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
982 
983   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
984                                                SelectionDAG &DAG) const;
985   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
986                                                SelectionDAG &DAG) const;
987   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
988   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
989   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
990   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
991   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
992                               SelectionDAG &DAG) const;
993   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
994   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
995   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
996   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
997                                             SelectionDAG &DAG) const;
998   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
999                                               SelectionDAG &DAG) const;
1000   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1001   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1002   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1003 
1004   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1005                         SmallVectorImpl<SDNode *> &Created) const override;
1006   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1007                           int &ExtraSteps, bool &UseOneConst,
1008                           bool Reciprocal) const override;
1009   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1010                            int &ExtraSteps) const override;
1011   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1012                            const DenormalMode &Mode) const override;
1013   SDValue getSqrtResultForDenormInput(SDValue Operand,
1014                                       SelectionDAG &DAG) const override;
1015   unsigned combineRepeatedFPDivisors() const override;
1016 
1017   ConstraintType getConstraintType(StringRef Constraint) const override;
1018   Register getRegisterByName(const char* RegName, LLT VT,
1019                              const MachineFunction &MF) const override;
1020 
1021   /// Examine constraint string and operand type and determine a weight value.
1022   /// The operand object must already have been set up with the operand type.
1023   ConstraintWeight
1024   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1025                                  const char *constraint) const override;
1026 
1027   std::pair<unsigned, const TargetRegisterClass *>
1028   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1029                                StringRef Constraint, MVT VT) const override;
1030 
1031   const char *LowerXConstraint(EVT ConstraintVT) const override;
1032 
1033   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1034                                     std::vector<SDValue> &Ops,
1035                                     SelectionDAG &DAG) const override;
1036 
getInlineAsmMemConstraint(StringRef ConstraintCode)1037   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1038     if (ConstraintCode == "Q")
1039       return InlineAsm::Constraint_Q;
1040     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1041     //        followed by llvm_unreachable so we'll leave them unimplemented in
1042     //        the backend for now.
1043     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1044   }
1045 
1046   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1047   bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
1048   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1049   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1050   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1051   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1052                               ISD::MemIndexedMode &AM, bool &IsInc,
1053                               SelectionDAG &DAG) const;
1054   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1055                                  ISD::MemIndexedMode &AM,
1056                                  SelectionDAG &DAG) const override;
1057   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1058                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1059                                   SelectionDAG &DAG) const override;
1060 
1061   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1062                           SelectionDAG &DAG) const override;
1063   void ReplaceExtractSubVectorResults(SDNode *N,
1064                                       SmallVectorImpl<SDValue> &Results,
1065                                       SelectionDAG &DAG) const;
1066 
1067   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1068 
1069   void finalizeLowering(MachineFunction &MF) const override;
1070 
1071   bool shouldLocalize(const MachineInstr &MI,
1072                       const TargetTransformInfo *TTI) const override;
1073 
1074   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1075                                          const APInt &OriginalDemandedBits,
1076                                          const APInt &OriginalDemandedElts,
1077                                          KnownBits &Known,
1078                                          TargetLoweringOpt &TLO,
1079                                          unsigned Depth) const override;
1080 
1081   // Normally SVE is only used for byte size vectors that do not fit within a
1082   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1083   // used for 64bit and 128bit vectors as well.
1084   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1085 
1086   // With the exception of data-predicate transitions, no instructions are
1087   // required to cast between legal scalable vector types. However:
1088   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1089   //     is not universally useable.
1090   //  2. Most unpacked integer types are not legal and thus integer extends
1091   //     cannot be used to convert between unpacked and packed types.
1092   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1093   // to transition between unpacked and packed types of the same element type,
1094   // with BITCAST used otherwise.
1095   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1096 };
1097 
1098 namespace AArch64 {
1099 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1100                          const TargetLibraryInfo *libInfo);
1101 } // end namespace AArch64
1102 
1103 } // end namespace llvm
1104 
1105 #endif
1106