xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1e8d8bef9SDimitry Andric //=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric ///
9e8d8bef9SDimitry Andric /// \file
10e8d8bef9SDimitry Andric /// Post-legalization lowering for instructions.
11e8d8bef9SDimitry Andric ///
12e8d8bef9SDimitry Andric /// This is used to offload pattern matching from the selector.
13e8d8bef9SDimitry Andric ///
14e8d8bef9SDimitry Andric /// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15e8d8bef9SDimitry Andric /// a G_ZIP, G_UZP, etc.
16e8d8bef9SDimitry Andric ///
17e8d8bef9SDimitry Andric /// General optimization combines should be handled by either the
18e8d8bef9SDimitry Andric /// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19e8d8bef9SDimitry Andric ///
20e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
21e8d8bef9SDimitry Andric 
22*0fca6ea1SDimitry Andric #include "AArch64ExpandImm.h"
23e8d8bef9SDimitry Andric #include "AArch64GlobalISelUtils.h"
24*0fca6ea1SDimitry Andric #include "AArch64PerfectShuffle.h"
25fe6060f1SDimitry Andric #include "AArch64Subtarget.h"
26fe6060f1SDimitry Andric #include "AArch64TargetMachine.h"
27fe6060f1SDimitry Andric #include "GISel/AArch64LegalizerInfo.h"
28e8d8bef9SDimitry Andric #include "MCTargetDesc/AArch64MCTargetDesc.h"
29fe6060f1SDimitry Andric #include "TargetInfo/AArch64TargetInfo.h"
30fe6060f1SDimitry Andric #include "Utils/AArch64BaseInfo.h"
31e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
32e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
33e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
3406c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
35bdd1243dSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
36bdd1243dSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
37bdd1243dSDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
38e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
39e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
40e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
41*0fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
42e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
43e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
44e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
45e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
46e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
47fe6060f1SDimitry Andric #include "llvm/IR/InstrTypes.h"
48e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
49e8d8bef9SDimitry Andric #include "llvm/Support/Debug.h"
50fe6060f1SDimitry Andric #include "llvm/Support/ErrorHandling.h"
51bdd1243dSDimitry Andric #include <optional>
52e8d8bef9SDimitry Andric 
5306c3fb27SDimitry Andric #define GET_GICOMBINER_DEPS
5406c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
5506c3fb27SDimitry Andric #undef GET_GICOMBINER_DEPS
5606c3fb27SDimitry Andric 
57e8d8bef9SDimitry Andric #define DEBUG_TYPE "aarch64-postlegalizer-lowering"
58e8d8bef9SDimitry Andric 
59e8d8bef9SDimitry Andric using namespace llvm;
60e8d8bef9SDimitry Andric using namespace MIPatternMatch;
61e8d8bef9SDimitry Andric using namespace AArch64GISelUtils;
62e8d8bef9SDimitry Andric 
6306c3fb27SDimitry Andric namespace {
6406c3fb27SDimitry Andric 
6506c3fb27SDimitry Andric #define GET_GICOMBINER_TYPES
6606c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
6706c3fb27SDimitry Andric #undef GET_GICOMBINER_TYPES
6806c3fb27SDimitry Andric 
69e8d8bef9SDimitry Andric /// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
70e8d8bef9SDimitry Andric ///
71e8d8bef9SDimitry Andric /// Used for matching target-supported shuffles before codegen.
72e8d8bef9SDimitry Andric struct ShuffleVectorPseudo {
73e8d8bef9SDimitry Andric   unsigned Opc;                 ///< Opcode for the instruction. (E.g. G_ZIP1)
74e8d8bef9SDimitry Andric   Register Dst;                 ///< Destination register.
75e8d8bef9SDimitry Andric   SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
76e8d8bef9SDimitry Andric   ShuffleVectorPseudo(unsigned Opc, Register Dst,
77e8d8bef9SDimitry Andric                       std::initializer_list<SrcOp> SrcOps)
78e8d8bef9SDimitry Andric       : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
7981ad6265SDimitry Andric   ShuffleVectorPseudo() = default;
80e8d8bef9SDimitry Andric };
81e8d8bef9SDimitry Andric 
82e8d8bef9SDimitry Andric /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
83e8d8bef9SDimitry Andric /// sources of the shuffle are different.
8406c3fb27SDimitry Andric std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
85e8d8bef9SDimitry Andric                                                     unsigned NumElts) {
86e8d8bef9SDimitry Andric   // Look for the first non-undef element.
87e8d8bef9SDimitry Andric   auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
88e8d8bef9SDimitry Andric   if (FirstRealElt == M.end())
89bdd1243dSDimitry Andric     return std::nullopt;
90e8d8bef9SDimitry Andric 
91e8d8bef9SDimitry Andric   // Use APInt to handle overflow when calculating expected element.
92e8d8bef9SDimitry Andric   unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
93e8d8bef9SDimitry Andric   APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
94e8d8bef9SDimitry Andric 
95e8d8bef9SDimitry Andric   // The following shuffle indices must be the successive elements after the
96e8d8bef9SDimitry Andric   // first real element.
97e8d8bef9SDimitry Andric   if (any_of(
98e8d8bef9SDimitry Andric           make_range(std::next(FirstRealElt), M.end()),
99e8d8bef9SDimitry Andric           [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
100bdd1243dSDimitry Andric     return std::nullopt;
101e8d8bef9SDimitry Andric 
102e8d8bef9SDimitry Andric   // The index of an EXT is the first element if it is not UNDEF.
103e8d8bef9SDimitry Andric   // Watch out for the beginning UNDEFs. The EXT index should be the expected
104e8d8bef9SDimitry Andric   // value of the first element.  E.g.
105e8d8bef9SDimitry Andric   // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
106e8d8bef9SDimitry Andric   // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
107e8d8bef9SDimitry Andric   // ExpectedElt is the last mask index plus 1.
108e8d8bef9SDimitry Andric   uint64_t Imm = ExpectedElt.getZExtValue();
109e8d8bef9SDimitry Andric   bool ReverseExt = false;
110e8d8bef9SDimitry Andric 
111e8d8bef9SDimitry Andric   // There are two difference cases requiring to reverse input vectors.
112e8d8bef9SDimitry Andric   // For example, for vector <4 x i32> we have the following cases,
113e8d8bef9SDimitry Andric   // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
114e8d8bef9SDimitry Andric   // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
115e8d8bef9SDimitry Andric   // For both cases, we finally use mask <5, 6, 7, 0>, which requires
116e8d8bef9SDimitry Andric   // to reverse two input vectors.
117e8d8bef9SDimitry Andric   if (Imm < NumElts)
118e8d8bef9SDimitry Andric     ReverseExt = true;
119e8d8bef9SDimitry Andric   else
120e8d8bef9SDimitry Andric     Imm -= NumElts;
121e8d8bef9SDimitry Andric   return std::make_pair(ReverseExt, Imm);
122e8d8bef9SDimitry Andric }
123e8d8bef9SDimitry Andric 
124fe6060f1SDimitry Andric /// Helper function for matchINS.
125fe6060f1SDimitry Andric ///
126fe6060f1SDimitry Andric /// \returns a value when \p M is an ins mask for \p NumInputElements.
127fe6060f1SDimitry Andric ///
128fe6060f1SDimitry Andric /// First element of the returned pair is true when the produced
129fe6060f1SDimitry Andric /// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
130fe6060f1SDimitry Andric ///
131fe6060f1SDimitry Andric /// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
13206c3fb27SDimitry Andric std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
133fe6060f1SDimitry Andric                                               int NumInputElements) {
134fe6060f1SDimitry Andric   if (M.size() != static_cast<size_t>(NumInputElements))
135bdd1243dSDimitry Andric     return std::nullopt;
136fe6060f1SDimitry Andric   int NumLHSMatch = 0, NumRHSMatch = 0;
137fe6060f1SDimitry Andric   int LastLHSMismatch = -1, LastRHSMismatch = -1;
138fe6060f1SDimitry Andric   for (int Idx = 0; Idx < NumInputElements; ++Idx) {
139fe6060f1SDimitry Andric     if (M[Idx] == -1) {
140fe6060f1SDimitry Andric       ++NumLHSMatch;
141fe6060f1SDimitry Andric       ++NumRHSMatch;
142fe6060f1SDimitry Andric       continue;
143fe6060f1SDimitry Andric     }
144fe6060f1SDimitry Andric     M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
145fe6060f1SDimitry Andric     M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
146fe6060f1SDimitry Andric   }
147fe6060f1SDimitry Andric   const int NumNeededToMatch = NumInputElements - 1;
148fe6060f1SDimitry Andric   if (NumLHSMatch == NumNeededToMatch)
149fe6060f1SDimitry Andric     return std::make_pair(true, LastLHSMismatch);
150fe6060f1SDimitry Andric   if (NumRHSMatch == NumNeededToMatch)
151fe6060f1SDimitry Andric     return std::make_pair(false, LastRHSMismatch);
152bdd1243dSDimitry Andric   return std::nullopt;
153fe6060f1SDimitry Andric }
154fe6060f1SDimitry Andric 
155e8d8bef9SDimitry Andric /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
156e8d8bef9SDimitry Andric /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
15706c3fb27SDimitry Andric bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
158e8d8bef9SDimitry Andric               ShuffleVectorPseudo &MatchInfo) {
159e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
160e8d8bef9SDimitry Andric   ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
161e8d8bef9SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
162e8d8bef9SDimitry Andric   Register Src = MI.getOperand(1).getReg();
163e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(Dst);
164e8d8bef9SDimitry Andric   unsigned EltSize = Ty.getScalarSizeInBits();
165e8d8bef9SDimitry Andric 
166e8d8bef9SDimitry Andric   // Element size for a rev cannot be 64.
167e8d8bef9SDimitry Andric   if (EltSize == 64)
168e8d8bef9SDimitry Andric     return false;
169e8d8bef9SDimitry Andric 
170e8d8bef9SDimitry Andric   unsigned NumElts = Ty.getNumElements();
171e8d8bef9SDimitry Andric 
172*0fca6ea1SDimitry Andric   // Try to produce a G_REV instruction
173*0fca6ea1SDimitry Andric   for (unsigned LaneSize : {64U, 32U, 16U}) {
174*0fca6ea1SDimitry Andric     if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
175*0fca6ea1SDimitry Andric       unsigned Opcode;
176*0fca6ea1SDimitry Andric       if (LaneSize == 64U)
177*0fca6ea1SDimitry Andric         Opcode = AArch64::G_REV64;
178*0fca6ea1SDimitry Andric       else if (LaneSize == 32U)
179*0fca6ea1SDimitry Andric         Opcode = AArch64::G_REV32;
180*0fca6ea1SDimitry Andric       else
181*0fca6ea1SDimitry Andric         Opcode = AArch64::G_REV16;
182*0fca6ea1SDimitry Andric 
183*0fca6ea1SDimitry Andric       MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
184e8d8bef9SDimitry Andric       return true;
185e8d8bef9SDimitry Andric     }
186*0fca6ea1SDimitry Andric   }
187e8d8bef9SDimitry Andric 
188e8d8bef9SDimitry Andric   return false;
189e8d8bef9SDimitry Andric }
190e8d8bef9SDimitry Andric 
191e8d8bef9SDimitry Andric /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
192e8d8bef9SDimitry Andric /// a G_TRN1 or G_TRN2 instruction.
19306c3fb27SDimitry Andric bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
194e8d8bef9SDimitry Andric               ShuffleVectorPseudo &MatchInfo) {
195e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
196e8d8bef9SDimitry Andric   unsigned WhichResult;
197e8d8bef9SDimitry Andric   ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
198e8d8bef9SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
199e8d8bef9SDimitry Andric   unsigned NumElts = MRI.getType(Dst).getNumElements();
200e8d8bef9SDimitry Andric   if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
201e8d8bef9SDimitry Andric     return false;
202e8d8bef9SDimitry Andric   unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
203e8d8bef9SDimitry Andric   Register V1 = MI.getOperand(1).getReg();
204e8d8bef9SDimitry Andric   Register V2 = MI.getOperand(2).getReg();
205e8d8bef9SDimitry Andric   MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
206e8d8bef9SDimitry Andric   return true;
207e8d8bef9SDimitry Andric }
208e8d8bef9SDimitry Andric 
209e8d8bef9SDimitry Andric /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
210e8d8bef9SDimitry Andric /// a G_UZP1 or G_UZP2 instruction.
211e8d8bef9SDimitry Andric ///
212e8d8bef9SDimitry Andric /// \param [in] MI - The shuffle vector instruction.
213e8d8bef9SDimitry Andric /// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
21406c3fb27SDimitry Andric bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
215e8d8bef9SDimitry Andric               ShuffleVectorPseudo &MatchInfo) {
216e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
217e8d8bef9SDimitry Andric   unsigned WhichResult;
218e8d8bef9SDimitry Andric   ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
219e8d8bef9SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
220e8d8bef9SDimitry Andric   unsigned NumElts = MRI.getType(Dst).getNumElements();
221e8d8bef9SDimitry Andric   if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
222e8d8bef9SDimitry Andric     return false;
223e8d8bef9SDimitry Andric   unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
224e8d8bef9SDimitry Andric   Register V1 = MI.getOperand(1).getReg();
225e8d8bef9SDimitry Andric   Register V2 = MI.getOperand(2).getReg();
226e8d8bef9SDimitry Andric   MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
227e8d8bef9SDimitry Andric   return true;
228e8d8bef9SDimitry Andric }
229e8d8bef9SDimitry Andric 
23006c3fb27SDimitry Andric bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
231e8d8bef9SDimitry Andric               ShuffleVectorPseudo &MatchInfo) {
232e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
233e8d8bef9SDimitry Andric   unsigned WhichResult;
234e8d8bef9SDimitry Andric   ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
235e8d8bef9SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
236e8d8bef9SDimitry Andric   unsigned NumElts = MRI.getType(Dst).getNumElements();
237*0fca6ea1SDimitry Andric   if (!isZIPMask(ShuffleMask, NumElts, WhichResult))
238e8d8bef9SDimitry Andric     return false;
239e8d8bef9SDimitry Andric   unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
240e8d8bef9SDimitry Andric   Register V1 = MI.getOperand(1).getReg();
241e8d8bef9SDimitry Andric   Register V2 = MI.getOperand(2).getReg();
242e8d8bef9SDimitry Andric   MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
243e8d8bef9SDimitry Andric   return true;
244e8d8bef9SDimitry Andric }
245e8d8bef9SDimitry Andric 
246e8d8bef9SDimitry Andric /// Helper function for matchDup.
24706c3fb27SDimitry Andric bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
248e8d8bef9SDimitry Andric                                  MachineRegisterInfo &MRI,
249e8d8bef9SDimitry Andric                                  ShuffleVectorPseudo &MatchInfo) {
250e8d8bef9SDimitry Andric   if (Lane != 0)
251e8d8bef9SDimitry Andric     return false;
252e8d8bef9SDimitry Andric 
253e8d8bef9SDimitry Andric   // Try to match a vector splat operation into a dup instruction.
254e8d8bef9SDimitry Andric   // We're looking for this pattern:
255e8d8bef9SDimitry Andric   //
256e8d8bef9SDimitry Andric   // %scalar:gpr(s64) = COPY $x0
257e8d8bef9SDimitry Andric   // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
258e8d8bef9SDimitry Andric   // %cst0:gpr(s32) = G_CONSTANT i32 0
259e8d8bef9SDimitry Andric   // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
260e8d8bef9SDimitry Andric   // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
26106c3fb27SDimitry Andric   // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
26206c3fb27SDimitry Andric   // %zerovec(<2 x s32>)
263e8d8bef9SDimitry Andric   //
264e8d8bef9SDimitry Andric   // ...into:
265e8d8bef9SDimitry Andric   // %splat = G_DUP %scalar
266e8d8bef9SDimitry Andric 
267e8d8bef9SDimitry Andric   // Begin matching the insert.
268e8d8bef9SDimitry Andric   auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
269e8d8bef9SDimitry Andric                              MI.getOperand(1).getReg(), MRI);
270e8d8bef9SDimitry Andric   if (!InsMI)
271e8d8bef9SDimitry Andric     return false;
272e8d8bef9SDimitry Andric   // Match the undef vector operand.
273e8d8bef9SDimitry Andric   if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
274e8d8bef9SDimitry Andric                     MRI))
275e8d8bef9SDimitry Andric     return false;
276e8d8bef9SDimitry Andric 
277e8d8bef9SDimitry Andric   // Match the index constant 0.
278e8d8bef9SDimitry Andric   if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
279e8d8bef9SDimitry Andric     return false;
280e8d8bef9SDimitry Andric 
281e8d8bef9SDimitry Andric   MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
282e8d8bef9SDimitry Andric                                   {InsMI->getOperand(2).getReg()});
283e8d8bef9SDimitry Andric   return true;
284e8d8bef9SDimitry Andric }
285e8d8bef9SDimitry Andric 
286e8d8bef9SDimitry Andric /// Helper function for matchDup.
28706c3fb27SDimitry Andric bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
288e8d8bef9SDimitry Andric                              MachineRegisterInfo &MRI,
289e8d8bef9SDimitry Andric                              ShuffleVectorPseudo &MatchInfo) {
290e8d8bef9SDimitry Andric   assert(Lane >= 0 && "Expected positive lane?");
291e8d8bef9SDimitry Andric   // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
292e8d8bef9SDimitry Andric   // lane's definition directly.
293e8d8bef9SDimitry Andric   auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
294e8d8bef9SDimitry Andric                                   MI.getOperand(1).getReg(), MRI);
295e8d8bef9SDimitry Andric   if (!BuildVecMI)
296e8d8bef9SDimitry Andric     return false;
297e8d8bef9SDimitry Andric   Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
298e8d8bef9SDimitry Andric   MatchInfo =
299e8d8bef9SDimitry Andric       ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
300e8d8bef9SDimitry Andric   return true;
301e8d8bef9SDimitry Andric }
302e8d8bef9SDimitry Andric 
30306c3fb27SDimitry Andric bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
304e8d8bef9SDimitry Andric               ShuffleVectorPseudo &MatchInfo) {
305e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
306e8d8bef9SDimitry Andric   auto MaybeLane = getSplatIndex(MI);
307e8d8bef9SDimitry Andric   if (!MaybeLane)
308e8d8bef9SDimitry Andric     return false;
309e8d8bef9SDimitry Andric   int Lane = *MaybeLane;
310e8d8bef9SDimitry Andric   // If this is undef splat, generate it via "just" vdup, if possible.
311e8d8bef9SDimitry Andric   if (Lane < 0)
312e8d8bef9SDimitry Andric     Lane = 0;
313e8d8bef9SDimitry Andric   if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
314e8d8bef9SDimitry Andric     return true;
315e8d8bef9SDimitry Andric   if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
316e8d8bef9SDimitry Andric     return true;
317e8d8bef9SDimitry Andric   return false;
318e8d8bef9SDimitry Andric }
319e8d8bef9SDimitry Andric 
320bdd1243dSDimitry Andric // Check if an EXT instruction can handle the shuffle mask when the vector
321bdd1243dSDimitry Andric // sources of the shuffle are the same.
32206c3fb27SDimitry Andric bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
323bdd1243dSDimitry Andric   unsigned NumElts = Ty.getNumElements();
324bdd1243dSDimitry Andric 
325bdd1243dSDimitry Andric   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
326bdd1243dSDimitry Andric   if (M[0] < 0)
327bdd1243dSDimitry Andric     return false;
328bdd1243dSDimitry Andric 
329bdd1243dSDimitry Andric   // If this is a VEXT shuffle, the immediate value is the index of the first
330bdd1243dSDimitry Andric   // element.  The other shuffle indices must be the successive elements after
331bdd1243dSDimitry Andric   // the first one.
332bdd1243dSDimitry Andric   unsigned ExpectedElt = M[0];
333bdd1243dSDimitry Andric   for (unsigned I = 1; I < NumElts; ++I) {
334bdd1243dSDimitry Andric     // Increment the expected index.  If it wraps around, just follow it
335bdd1243dSDimitry Andric     // back to index zero and keep going.
336bdd1243dSDimitry Andric     ++ExpectedElt;
337bdd1243dSDimitry Andric     if (ExpectedElt == NumElts)
338bdd1243dSDimitry Andric       ExpectedElt = 0;
339bdd1243dSDimitry Andric 
340bdd1243dSDimitry Andric     if (M[I] < 0)
341bdd1243dSDimitry Andric       continue; // Ignore UNDEF indices.
342bdd1243dSDimitry Andric     if (ExpectedElt != static_cast<unsigned>(M[I]))
343bdd1243dSDimitry Andric       return false;
344bdd1243dSDimitry Andric   }
345bdd1243dSDimitry Andric 
346bdd1243dSDimitry Andric   return true;
347bdd1243dSDimitry Andric }
348bdd1243dSDimitry Andric 
34906c3fb27SDimitry Andric bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
350e8d8bef9SDimitry Andric               ShuffleVectorPseudo &MatchInfo) {
351e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
352e8d8bef9SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
353bdd1243dSDimitry Andric   LLT DstTy = MRI.getType(Dst);
354e8d8bef9SDimitry Andric   Register V1 = MI.getOperand(1).getReg();
355e8d8bef9SDimitry Andric   Register V2 = MI.getOperand(2).getReg();
356bdd1243dSDimitry Andric   auto Mask = MI.getOperand(3).getShuffleMask();
357bdd1243dSDimitry Andric   uint64_t Imm;
358bdd1243dSDimitry Andric   auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
359bdd1243dSDimitry Andric   uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
360bdd1243dSDimitry Andric 
361bdd1243dSDimitry Andric   if (!ExtInfo) {
362bdd1243dSDimitry Andric     if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
363bdd1243dSDimitry Andric         !isSingletonExtMask(Mask, DstTy))
364bdd1243dSDimitry Andric       return false;
365bdd1243dSDimitry Andric 
366bdd1243dSDimitry Andric     Imm = Mask[0] * ExtFactor;
367bdd1243dSDimitry Andric     MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
368bdd1243dSDimitry Andric     return true;
369bdd1243dSDimitry Andric   }
370bdd1243dSDimitry Andric   bool ReverseExt;
371bdd1243dSDimitry Andric   std::tie(ReverseExt, Imm) = *ExtInfo;
372e8d8bef9SDimitry Andric   if (ReverseExt)
373e8d8bef9SDimitry Andric     std::swap(V1, V2);
374e8d8bef9SDimitry Andric   Imm *= ExtFactor;
375e8d8bef9SDimitry Andric   MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
376e8d8bef9SDimitry Andric   return true;
377e8d8bef9SDimitry Andric }
378e8d8bef9SDimitry Andric 
379e8d8bef9SDimitry Andric /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
380e8d8bef9SDimitry Andric /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
38106c3fb27SDimitry Andric void applyShuffleVectorPseudo(MachineInstr &MI,
382e8d8bef9SDimitry Andric                               ShuffleVectorPseudo &MatchInfo) {
383e8d8bef9SDimitry Andric   MachineIRBuilder MIRBuilder(MI);
384e8d8bef9SDimitry Andric   MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
385e8d8bef9SDimitry Andric   MI.eraseFromParent();
386e8d8bef9SDimitry Andric }
387e8d8bef9SDimitry Andric 
388e8d8bef9SDimitry Andric /// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
389e8d8bef9SDimitry Andric /// Special-cased because the constant operand must be emitted as a G_CONSTANT
390e8d8bef9SDimitry Andric /// for the imported tablegen patterns to work.
39106c3fb27SDimitry Andric void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
392e8d8bef9SDimitry Andric   MachineIRBuilder MIRBuilder(MI);
3935f757f3fSDimitry Andric   if (MatchInfo.SrcOps[2].getImm() == 0)
3945f757f3fSDimitry Andric     MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
3955f757f3fSDimitry Andric   else {
396e8d8bef9SDimitry Andric     // Tablegen patterns expect an i32 G_CONSTANT as the final op.
397e8d8bef9SDimitry Andric     auto Cst =
398e8d8bef9SDimitry Andric         MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
399e8d8bef9SDimitry Andric     MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
400e8d8bef9SDimitry Andric                           {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
4015f757f3fSDimitry Andric   }
402e8d8bef9SDimitry Andric   MI.eraseFromParent();
403e8d8bef9SDimitry Andric }
404e8d8bef9SDimitry Andric 
405*0fca6ea1SDimitry Andric bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
406*0fca6ea1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
407*0fca6ea1SDimitry Andric 
408*0fca6ea1SDimitry Andric   auto ValAndVReg =
409*0fca6ea1SDimitry Andric       getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
410*0fca6ea1SDimitry Andric   return !ValAndVReg;
411*0fca6ea1SDimitry Andric }
412*0fca6ea1SDimitry Andric 
413*0fca6ea1SDimitry Andric void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
414*0fca6ea1SDimitry Andric                          MachineIRBuilder &Builder) {
415*0fca6ea1SDimitry Andric   auto &Insert = cast<GInsertVectorElement>(MI);
416*0fca6ea1SDimitry Andric   Builder.setInstrAndDebugLoc(Insert);
417*0fca6ea1SDimitry Andric 
418*0fca6ea1SDimitry Andric   Register Offset = Insert.getIndexReg();
419*0fca6ea1SDimitry Andric   LLT VecTy = MRI.getType(Insert.getReg(0));
420*0fca6ea1SDimitry Andric   LLT EltTy = MRI.getType(Insert.getElementReg());
421*0fca6ea1SDimitry Andric   LLT IdxTy = MRI.getType(Insert.getIndexReg());
422*0fca6ea1SDimitry Andric 
423*0fca6ea1SDimitry Andric   // Create a stack slot and store the vector into it
424*0fca6ea1SDimitry Andric   MachineFunction &MF = Builder.getMF();
425*0fca6ea1SDimitry Andric   Align Alignment(
426*0fca6ea1SDimitry Andric       std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
427*0fca6ea1SDimitry Andric   int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
428*0fca6ea1SDimitry Andric                                                      Alignment, false);
429*0fca6ea1SDimitry Andric   LLT FramePtrTy = LLT::pointer(0, 64);
430*0fca6ea1SDimitry Andric   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
431*0fca6ea1SDimitry Andric   auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
432*0fca6ea1SDimitry Andric 
433*0fca6ea1SDimitry Andric   Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
434*0fca6ea1SDimitry Andric 
435*0fca6ea1SDimitry Andric   // Get the pointer to the element, and be sure not to hit undefined behavior
436*0fca6ea1SDimitry Andric   // if the index is out of bounds.
437*0fca6ea1SDimitry Andric   assert(isPowerOf2_64(VecTy.getNumElements()) &&
438*0fca6ea1SDimitry Andric          "Expected a power-2 vector size");
439*0fca6ea1SDimitry Andric   auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
440*0fca6ea1SDimitry Andric   Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
441*0fca6ea1SDimitry Andric   auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
442*0fca6ea1SDimitry Andric   Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
443*0fca6ea1SDimitry Andric   Register EltPtr =
444*0fca6ea1SDimitry Andric       Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
445*0fca6ea1SDimitry Andric           .getReg(0);
446*0fca6ea1SDimitry Andric 
447*0fca6ea1SDimitry Andric   // Write the inserted element
448*0fca6ea1SDimitry Andric   Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
449*0fca6ea1SDimitry Andric   // Reload the whole vector.
450*0fca6ea1SDimitry Andric   Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
451*0fca6ea1SDimitry Andric   Insert.eraseFromParent();
452*0fca6ea1SDimitry Andric }
453*0fca6ea1SDimitry Andric 
454fe6060f1SDimitry Andric /// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
455fe6060f1SDimitry Andric /// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
456fe6060f1SDimitry Andric ///
457fe6060f1SDimitry Andric /// e.g.
458fe6060f1SDimitry Andric ///   %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
459fe6060f1SDimitry Andric ///
460fe6060f1SDimitry Andric /// Can be represented as
461fe6060f1SDimitry Andric ///
462fe6060f1SDimitry Andric ///   %extract = G_EXTRACT_VECTOR_ELT %left, 0
463fe6060f1SDimitry Andric ///   %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
464fe6060f1SDimitry Andric ///
46506c3fb27SDimitry Andric bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
466fe6060f1SDimitry Andric               std::tuple<Register, int, Register, int> &MatchInfo) {
467fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
468fe6060f1SDimitry Andric   ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
469fe6060f1SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
470fe6060f1SDimitry Andric   int NumElts = MRI.getType(Dst).getNumElements();
471fe6060f1SDimitry Andric   auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
472fe6060f1SDimitry Andric   if (!DstIsLeftAndDstLane)
473fe6060f1SDimitry Andric     return false;
474fe6060f1SDimitry Andric   bool DstIsLeft;
475fe6060f1SDimitry Andric   int DstLane;
476fe6060f1SDimitry Andric   std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
477fe6060f1SDimitry Andric   Register Left = MI.getOperand(1).getReg();
478fe6060f1SDimitry Andric   Register Right = MI.getOperand(2).getReg();
479fe6060f1SDimitry Andric   Register DstVec = DstIsLeft ? Left : Right;
480fe6060f1SDimitry Andric   Register SrcVec = Left;
481fe6060f1SDimitry Andric 
482fe6060f1SDimitry Andric   int SrcLane = ShuffleMask[DstLane];
483fe6060f1SDimitry Andric   if (SrcLane >= NumElts) {
484fe6060f1SDimitry Andric     SrcVec = Right;
485fe6060f1SDimitry Andric     SrcLane -= NumElts;
486fe6060f1SDimitry Andric   }
487fe6060f1SDimitry Andric 
488fe6060f1SDimitry Andric   MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
489fe6060f1SDimitry Andric   return true;
490fe6060f1SDimitry Andric }
491fe6060f1SDimitry Andric 
49206c3fb27SDimitry Andric void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
493fe6060f1SDimitry Andric               MachineIRBuilder &Builder,
494fe6060f1SDimitry Andric               std::tuple<Register, int, Register, int> &MatchInfo) {
495fe6060f1SDimitry Andric   Builder.setInstrAndDebugLoc(MI);
496fe6060f1SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
497fe6060f1SDimitry Andric   auto ScalarTy = MRI.getType(Dst).getElementType();
498fe6060f1SDimitry Andric   Register DstVec, SrcVec;
499fe6060f1SDimitry Andric   int DstLane, SrcLane;
500fe6060f1SDimitry Andric   std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
501fe6060f1SDimitry Andric   auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
502fe6060f1SDimitry Andric   auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
503fe6060f1SDimitry Andric   auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
504fe6060f1SDimitry Andric   Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
505fe6060f1SDimitry Andric   MI.eraseFromParent();
506fe6060f1SDimitry Andric }
507fe6060f1SDimitry Andric 
508e8d8bef9SDimitry Andric /// isVShiftRImm - Check if this is a valid vector for the immediate
509e8d8bef9SDimitry Andric /// operand of a vector shift right operation. The value must be in the range:
510e8d8bef9SDimitry Andric ///   1 <= Value <= ElementBits for a right shift.
51106c3fb27SDimitry Andric bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
512e8d8bef9SDimitry Andric                   int64_t &Cnt) {
513e8d8bef9SDimitry Andric   assert(Ty.isVector() && "vector shift count is not a vector type");
514e8d8bef9SDimitry Andric   MachineInstr *MI = MRI.getVRegDef(Reg);
515fe6060f1SDimitry Andric   auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
516e8d8bef9SDimitry Andric   if (!Cst)
517e8d8bef9SDimitry Andric     return false;
518e8d8bef9SDimitry Andric   Cnt = *Cst;
519e8d8bef9SDimitry Andric   int64_t ElementBits = Ty.getScalarSizeInBits();
520e8d8bef9SDimitry Andric   return Cnt >= 1 && Cnt <= ElementBits;
521e8d8bef9SDimitry Andric }
522e8d8bef9SDimitry Andric 
523e8d8bef9SDimitry Andric /// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
52406c3fb27SDimitry Andric bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
525e8d8bef9SDimitry Andric                        int64_t &Imm) {
526e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
527e8d8bef9SDimitry Andric          MI.getOpcode() == TargetOpcode::G_LSHR);
528e8d8bef9SDimitry Andric   LLT Ty = MRI.getType(MI.getOperand(1).getReg());
529e8d8bef9SDimitry Andric   if (!Ty.isVector())
530e8d8bef9SDimitry Andric     return false;
531e8d8bef9SDimitry Andric   return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
532e8d8bef9SDimitry Andric }
533e8d8bef9SDimitry Andric 
53406c3fb27SDimitry Andric void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
535e8d8bef9SDimitry Andric                        int64_t &Imm) {
536e8d8bef9SDimitry Andric   unsigned Opc = MI.getOpcode();
537e8d8bef9SDimitry Andric   assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
538e8d8bef9SDimitry Andric   unsigned NewOpc =
539e8d8bef9SDimitry Andric       Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
540e8d8bef9SDimitry Andric   MachineIRBuilder MIB(MI);
541e8d8bef9SDimitry Andric   auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
542e8d8bef9SDimitry Andric   MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
543e8d8bef9SDimitry Andric   MI.eraseFromParent();
544e8d8bef9SDimitry Andric }
545e8d8bef9SDimitry Andric 
546e8d8bef9SDimitry Andric /// Determine if it is possible to modify the \p RHS and predicate \p P of a
547e8d8bef9SDimitry Andric /// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
548e8d8bef9SDimitry Andric ///
549e8d8bef9SDimitry Andric /// \returns A pair containing the updated immediate and predicate which may
550e8d8bef9SDimitry Andric /// be used to optimize the instruction.
551e8d8bef9SDimitry Andric ///
552e8d8bef9SDimitry Andric /// \note This assumes that the comparison has been legalized.
553bdd1243dSDimitry Andric std::optional<std::pair<uint64_t, CmpInst::Predicate>>
554e8d8bef9SDimitry Andric tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
555e8d8bef9SDimitry Andric                         const MachineRegisterInfo &MRI) {
556e8d8bef9SDimitry Andric   const auto &Ty = MRI.getType(RHS);
557e8d8bef9SDimitry Andric   if (Ty.isVector())
558bdd1243dSDimitry Andric     return std::nullopt;
559e8d8bef9SDimitry Andric   unsigned Size = Ty.getSizeInBits();
560e8d8bef9SDimitry Andric   assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
561e8d8bef9SDimitry Andric 
562e8d8bef9SDimitry Andric   // If the RHS is not a constant, or the RHS is already a valid arithmetic
563e8d8bef9SDimitry Andric   // immediate, then there is nothing to change.
564349cc55cSDimitry Andric   auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
565e8d8bef9SDimitry Andric   if (!ValAndVReg)
566bdd1243dSDimitry Andric     return std::nullopt;
567*0fca6ea1SDimitry Andric   uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
568*0fca6ea1SDimitry Andric   uint64_t C = OriginalC;
569e8d8bef9SDimitry Andric   if (isLegalArithImmed(C))
570bdd1243dSDimitry Andric     return std::nullopt;
571e8d8bef9SDimitry Andric 
572e8d8bef9SDimitry Andric   // We have a non-arithmetic immediate. Check if adjusting the immediate and
573e8d8bef9SDimitry Andric   // adjusting the predicate will result in a legal arithmetic immediate.
574e8d8bef9SDimitry Andric   switch (P) {
575e8d8bef9SDimitry Andric   default:
576bdd1243dSDimitry Andric     return std::nullopt;
577e8d8bef9SDimitry Andric   case CmpInst::ICMP_SLT:
578e8d8bef9SDimitry Andric   case CmpInst::ICMP_SGE:
579e8d8bef9SDimitry Andric     // Check for
580e8d8bef9SDimitry Andric     //
581e8d8bef9SDimitry Andric     // x slt c => x sle c - 1
582e8d8bef9SDimitry Andric     // x sge c => x sgt c - 1
583e8d8bef9SDimitry Andric     //
584e8d8bef9SDimitry Andric     // When c is not the smallest possible negative number.
585e8d8bef9SDimitry Andric     if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
586e8d8bef9SDimitry Andric         (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
587bdd1243dSDimitry Andric       return std::nullopt;
588e8d8bef9SDimitry Andric     P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
589e8d8bef9SDimitry Andric     C -= 1;
590e8d8bef9SDimitry Andric     break;
591e8d8bef9SDimitry Andric   case CmpInst::ICMP_ULT:
592e8d8bef9SDimitry Andric   case CmpInst::ICMP_UGE:
593e8d8bef9SDimitry Andric     // Check for
594e8d8bef9SDimitry Andric     //
595e8d8bef9SDimitry Andric     // x ult c => x ule c - 1
596e8d8bef9SDimitry Andric     // x uge c => x ugt c - 1
597e8d8bef9SDimitry Andric     //
598e8d8bef9SDimitry Andric     // When c is not zero.
599e8d8bef9SDimitry Andric     if (C == 0)
600bdd1243dSDimitry Andric       return std::nullopt;
601e8d8bef9SDimitry Andric     P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
602e8d8bef9SDimitry Andric     C -= 1;
603e8d8bef9SDimitry Andric     break;
604e8d8bef9SDimitry Andric   case CmpInst::ICMP_SLE:
605e8d8bef9SDimitry Andric   case CmpInst::ICMP_SGT:
606e8d8bef9SDimitry Andric     // Check for
607e8d8bef9SDimitry Andric     //
608e8d8bef9SDimitry Andric     // x sle c => x slt c + 1
609e8d8bef9SDimitry Andric     // x sgt c => s sge c + 1
610e8d8bef9SDimitry Andric     //
611e8d8bef9SDimitry Andric     // When c is not the largest possible signed integer.
612e8d8bef9SDimitry Andric     if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
613e8d8bef9SDimitry Andric         (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
614bdd1243dSDimitry Andric       return std::nullopt;
615e8d8bef9SDimitry Andric     P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
616e8d8bef9SDimitry Andric     C += 1;
617e8d8bef9SDimitry Andric     break;
618e8d8bef9SDimitry Andric   case CmpInst::ICMP_ULE:
619e8d8bef9SDimitry Andric   case CmpInst::ICMP_UGT:
620e8d8bef9SDimitry Andric     // Check for
621e8d8bef9SDimitry Andric     //
622e8d8bef9SDimitry Andric     // x ule c => x ult c + 1
623e8d8bef9SDimitry Andric     // x ugt c => s uge c + 1
624e8d8bef9SDimitry Andric     //
625e8d8bef9SDimitry Andric     // When c is not the largest possible unsigned integer.
626e8d8bef9SDimitry Andric     if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
627e8d8bef9SDimitry Andric         (Size == 64 && C == UINT64_MAX))
628bdd1243dSDimitry Andric       return std::nullopt;
629e8d8bef9SDimitry Andric     P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
630e8d8bef9SDimitry Andric     C += 1;
631e8d8bef9SDimitry Andric     break;
632e8d8bef9SDimitry Andric   }
633e8d8bef9SDimitry Andric 
634e8d8bef9SDimitry Andric   // Check if the new constant is valid, and return the updated constant and
635e8d8bef9SDimitry Andric   // predicate if it is.
636e8d8bef9SDimitry Andric   if (Size == 32)
637e8d8bef9SDimitry Andric     C = static_cast<uint32_t>(C);
638*0fca6ea1SDimitry Andric   if (isLegalArithImmed(C))
639e8d8bef9SDimitry Andric     return {{C, P}};
640*0fca6ea1SDimitry Andric 
641*0fca6ea1SDimitry Andric   auto IsMaterializableInSingleInstruction = [=](uint64_t Imm) {
642*0fca6ea1SDimitry Andric     SmallVector<AArch64_IMM::ImmInsnModel> Insn;
643*0fca6ea1SDimitry Andric     AArch64_IMM::expandMOVImm(Imm, 32, Insn);
644*0fca6ea1SDimitry Andric     return Insn.size() == 1;
645*0fca6ea1SDimitry Andric   };
646*0fca6ea1SDimitry Andric 
647*0fca6ea1SDimitry Andric   if (!IsMaterializableInSingleInstruction(OriginalC) &&
648*0fca6ea1SDimitry Andric       IsMaterializableInSingleInstruction(C))
649*0fca6ea1SDimitry Andric     return {{C, P}};
650*0fca6ea1SDimitry Andric 
651*0fca6ea1SDimitry Andric   return std::nullopt;
652e8d8bef9SDimitry Andric }
653e8d8bef9SDimitry Andric 
654e8d8bef9SDimitry Andric /// Determine whether or not it is possible to update the RHS and predicate of
655e8d8bef9SDimitry Andric /// a G_ICMP instruction such that the RHS will be selected as an arithmetic
656e8d8bef9SDimitry Andric /// immediate.
657e8d8bef9SDimitry Andric ///
658e8d8bef9SDimitry Andric /// \p MI - The G_ICMP instruction
659e8d8bef9SDimitry Andric /// \p MatchInfo - The new RHS immediate and predicate on success
660e8d8bef9SDimitry Andric ///
661e8d8bef9SDimitry Andric /// See tryAdjustICmpImmAndPred for valid transformations.
662e8d8bef9SDimitry Andric bool matchAdjustICmpImmAndPred(
663e8d8bef9SDimitry Andric     MachineInstr &MI, const MachineRegisterInfo &MRI,
664e8d8bef9SDimitry Andric     std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
665e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_ICMP);
666e8d8bef9SDimitry Andric   Register RHS = MI.getOperand(3).getReg();
667e8d8bef9SDimitry Andric   auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
668e8d8bef9SDimitry Andric   if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
669e8d8bef9SDimitry Andric     MatchInfo = *MaybeNewImmAndPred;
670e8d8bef9SDimitry Andric     return true;
671e8d8bef9SDimitry Andric   }
672e8d8bef9SDimitry Andric   return false;
673e8d8bef9SDimitry Andric }
674e8d8bef9SDimitry Andric 
67506c3fb27SDimitry Andric void applyAdjustICmpImmAndPred(
676e8d8bef9SDimitry Andric     MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
677e8d8bef9SDimitry Andric     MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
678e8d8bef9SDimitry Andric   MIB.setInstrAndDebugLoc(MI);
679e8d8bef9SDimitry Andric   MachineOperand &RHS = MI.getOperand(3);
680e8d8bef9SDimitry Andric   MachineRegisterInfo &MRI = *MIB.getMRI();
681e8d8bef9SDimitry Andric   auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
682e8d8bef9SDimitry Andric                                MatchInfo.first);
683e8d8bef9SDimitry Andric   Observer.changingInstr(MI);
684e8d8bef9SDimitry Andric   RHS.setReg(Cst->getOperand(0).getReg());
685e8d8bef9SDimitry Andric   MI.getOperand(1).setPredicate(MatchInfo.second);
686e8d8bef9SDimitry Andric   Observer.changedInstr(MI);
687e8d8bef9SDimitry Andric }
688e8d8bef9SDimitry Andric 
689e8d8bef9SDimitry Andric bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
690e8d8bef9SDimitry Andric                   std::pair<unsigned, int> &MatchInfo) {
691e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
692e8d8bef9SDimitry Andric   Register Src1Reg = MI.getOperand(1).getReg();
693e8d8bef9SDimitry Andric   const LLT SrcTy = MRI.getType(Src1Reg);
694e8d8bef9SDimitry Andric   const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
695e8d8bef9SDimitry Andric 
696e8d8bef9SDimitry Andric   auto LaneIdx = getSplatIndex(MI);
697e8d8bef9SDimitry Andric   if (!LaneIdx)
698e8d8bef9SDimitry Andric     return false;
699e8d8bef9SDimitry Andric 
700e8d8bef9SDimitry Andric   // The lane idx should be within the first source vector.
701e8d8bef9SDimitry Andric   if (*LaneIdx >= SrcTy.getNumElements())
702e8d8bef9SDimitry Andric     return false;
703e8d8bef9SDimitry Andric 
704e8d8bef9SDimitry Andric   if (DstTy != SrcTy)
705e8d8bef9SDimitry Andric     return false;
706e8d8bef9SDimitry Andric 
707e8d8bef9SDimitry Andric   LLT ScalarTy = SrcTy.getElementType();
708e8d8bef9SDimitry Andric   unsigned ScalarSize = ScalarTy.getSizeInBits();
709e8d8bef9SDimitry Andric 
710e8d8bef9SDimitry Andric   unsigned Opc = 0;
711e8d8bef9SDimitry Andric   switch (SrcTy.getNumElements()) {
712e8d8bef9SDimitry Andric   case 2:
713e8d8bef9SDimitry Andric     if (ScalarSize == 64)
714e8d8bef9SDimitry Andric       Opc = AArch64::G_DUPLANE64;
715fe6060f1SDimitry Andric     else if (ScalarSize == 32)
716fe6060f1SDimitry Andric       Opc = AArch64::G_DUPLANE32;
717e8d8bef9SDimitry Andric     break;
718e8d8bef9SDimitry Andric   case 4:
719e8d8bef9SDimitry Andric     if (ScalarSize == 32)
720e8d8bef9SDimitry Andric       Opc = AArch64::G_DUPLANE32;
7215f757f3fSDimitry Andric     else if (ScalarSize == 16)
7225f757f3fSDimitry Andric       Opc = AArch64::G_DUPLANE16;
723e8d8bef9SDimitry Andric     break;
724e8d8bef9SDimitry Andric   case 8:
7255f757f3fSDimitry Andric     if (ScalarSize == 8)
7265f757f3fSDimitry Andric       Opc = AArch64::G_DUPLANE8;
7275f757f3fSDimitry Andric     else if (ScalarSize == 16)
728e8d8bef9SDimitry Andric       Opc = AArch64::G_DUPLANE16;
729e8d8bef9SDimitry Andric     break;
730e8d8bef9SDimitry Andric   case 16:
731e8d8bef9SDimitry Andric     if (ScalarSize == 8)
732e8d8bef9SDimitry Andric       Opc = AArch64::G_DUPLANE8;
733e8d8bef9SDimitry Andric     break;
734e8d8bef9SDimitry Andric   default:
735e8d8bef9SDimitry Andric     break;
736e8d8bef9SDimitry Andric   }
737e8d8bef9SDimitry Andric   if (!Opc)
738e8d8bef9SDimitry Andric     return false;
739e8d8bef9SDimitry Andric 
740e8d8bef9SDimitry Andric   MatchInfo.first = Opc;
741e8d8bef9SDimitry Andric   MatchInfo.second = *LaneIdx;
742e8d8bef9SDimitry Andric   return true;
743e8d8bef9SDimitry Andric }
744e8d8bef9SDimitry Andric 
74506c3fb27SDimitry Andric void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
746e8d8bef9SDimitry Andric                   MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
747e8d8bef9SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
748fe6060f1SDimitry Andric   Register Src1Reg = MI.getOperand(1).getReg();
749fe6060f1SDimitry Andric   const LLT SrcTy = MRI.getType(Src1Reg);
750fe6060f1SDimitry Andric 
751e8d8bef9SDimitry Andric   B.setInstrAndDebugLoc(MI);
752e8d8bef9SDimitry Andric   auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
753fe6060f1SDimitry Andric 
754fe6060f1SDimitry Andric   Register DupSrc = MI.getOperand(1).getReg();
755fe6060f1SDimitry Andric   // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
756fe6060f1SDimitry Andric   // To do this, we can use a G_CONCAT_VECTORS to do the widening.
7575f757f3fSDimitry Andric   if (SrcTy.getSizeInBits() == 64) {
758fe6060f1SDimitry Andric     auto Undef = B.buildUndef(SrcTy);
7595f757f3fSDimitry Andric     DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
760fe6060f1SDimitry Andric                                   {Src1Reg, Undef.getReg(0)})
761fe6060f1SDimitry Andric                  .getReg(0);
762fe6060f1SDimitry Andric   }
763fe6060f1SDimitry Andric   B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
764e8d8bef9SDimitry Andric   MI.eraseFromParent();
765e8d8bef9SDimitry Andric }
766e8d8bef9SDimitry Andric 
767cb14a3feSDimitry Andric bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
768cb14a3feSDimitry Andric   auto &Unmerge = cast<GUnmerge>(MI);
769cb14a3feSDimitry Andric   Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
770cb14a3feSDimitry Andric   const LLT SrcTy = MRI.getType(Src1Reg);
771*0fca6ea1SDimitry Andric   if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
772*0fca6ea1SDimitry Andric     return false;
773cb14a3feSDimitry Andric   return SrcTy.isVector() && !SrcTy.isScalable() &&
774cb14a3feSDimitry Andric          Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
775cb14a3feSDimitry Andric }
776cb14a3feSDimitry Andric 
777cb14a3feSDimitry Andric void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
778cb14a3feSDimitry Andric                                  MachineIRBuilder &B) {
779cb14a3feSDimitry Andric   auto &Unmerge = cast<GUnmerge>(MI);
780cb14a3feSDimitry Andric   Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
781cb14a3feSDimitry Andric   const LLT SrcTy = MRI.getType(Src1Reg);
782cb14a3feSDimitry Andric   assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
783cb14a3feSDimitry Andric          "Expected a fixed length vector");
784cb14a3feSDimitry Andric 
785cb14a3feSDimitry Andric   for (int I = 0; I < SrcTy.getNumElements(); ++I)
786cb14a3feSDimitry Andric     B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
787cb14a3feSDimitry Andric   MI.eraseFromParent();
788cb14a3feSDimitry Andric }
789cb14a3feSDimitry Andric 
79006c3fb27SDimitry Andric bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
791fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
792fe6060f1SDimitry Andric   auto Splat = getAArch64VectorSplat(MI, MRI);
793fe6060f1SDimitry Andric   if (!Splat)
794fe6060f1SDimitry Andric     return false;
795fe6060f1SDimitry Andric   if (Splat->isReg())
796fe6060f1SDimitry Andric     return true;
797fe6060f1SDimitry Andric   // Later, during selection, we'll try to match imported patterns using
798fe6060f1SDimitry Andric   // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
799fe6060f1SDimitry Andric   // G_BUILD_VECTORs which could match those patterns.
800fe6060f1SDimitry Andric   int64_t Cst = Splat->getCst();
801fe6060f1SDimitry Andric   return (Cst != 0 && Cst != -1);
802fe6060f1SDimitry Andric }
803fe6060f1SDimitry Andric 
80406c3fb27SDimitry Andric void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
805fe6060f1SDimitry Andric                            MachineIRBuilder &B) {
806fe6060f1SDimitry Andric   B.setInstrAndDebugLoc(MI);
807fe6060f1SDimitry Andric   B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
808fe6060f1SDimitry Andric                {MI.getOperand(1).getReg()});
809fe6060f1SDimitry Andric   MI.eraseFromParent();
810fe6060f1SDimitry Andric }
811fe6060f1SDimitry Andric 
812fe6060f1SDimitry Andric /// \returns how many instructions would be saved by folding a G_ICMP's shift
813fe6060f1SDimitry Andric /// and/or extension operations.
81406c3fb27SDimitry Andric unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) {
815fe6060f1SDimitry Andric   // No instructions to save if there's more than one use or no uses.
816fe6060f1SDimitry Andric   if (!MRI.hasOneNonDBGUse(CmpOp))
817fe6060f1SDimitry Andric     return 0;
818fe6060f1SDimitry Andric 
819fe6060f1SDimitry Andric   // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
820fe6060f1SDimitry Andric   auto IsSupportedExtend = [&](const MachineInstr &MI) {
821fe6060f1SDimitry Andric     if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
822fe6060f1SDimitry Andric       return true;
823fe6060f1SDimitry Andric     if (MI.getOpcode() != TargetOpcode::G_AND)
824fe6060f1SDimitry Andric       return false;
825fe6060f1SDimitry Andric     auto ValAndVReg =
826349cc55cSDimitry Andric         getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
827fe6060f1SDimitry Andric     if (!ValAndVReg)
828fe6060f1SDimitry Andric       return false;
829fe6060f1SDimitry Andric     uint64_t Mask = ValAndVReg->Value.getZExtValue();
830fe6060f1SDimitry Andric     return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
831fe6060f1SDimitry Andric   };
832fe6060f1SDimitry Andric 
833fe6060f1SDimitry Andric   MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);
834fe6060f1SDimitry Andric   if (IsSupportedExtend(*Def))
835fe6060f1SDimitry Andric     return 1;
836fe6060f1SDimitry Andric 
837fe6060f1SDimitry Andric   unsigned Opc = Def->getOpcode();
838fe6060f1SDimitry Andric   if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
839fe6060f1SDimitry Andric       Opc != TargetOpcode::G_LSHR)
840fe6060f1SDimitry Andric     return 0;
841fe6060f1SDimitry Andric 
842fe6060f1SDimitry Andric   auto MaybeShiftAmt =
843349cc55cSDimitry Andric       getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
844fe6060f1SDimitry Andric   if (!MaybeShiftAmt)
845fe6060f1SDimitry Andric     return 0;
846fe6060f1SDimitry Andric   uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
847fe6060f1SDimitry Andric   MachineInstr *ShiftLHS =
848fe6060f1SDimitry Andric       getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
849fe6060f1SDimitry Andric 
850fe6060f1SDimitry Andric   // Check if we can fold an extend and a shift.
851fe6060f1SDimitry Andric   // FIXME: This is duplicated with the selector. (See:
852fe6060f1SDimitry Andric   // selectArithExtendedRegister)
853fe6060f1SDimitry Andric   if (IsSupportedExtend(*ShiftLHS))
854fe6060f1SDimitry Andric     return (ShiftAmt <= 4) ? 2 : 1;
855fe6060f1SDimitry Andric 
856fe6060f1SDimitry Andric   LLT Ty = MRI.getType(Def->getOperand(0).getReg());
857fe6060f1SDimitry Andric   if (Ty.isVector())
858fe6060f1SDimitry Andric     return 0;
859fe6060f1SDimitry Andric   unsigned ShiftSize = Ty.getSizeInBits();
860fe6060f1SDimitry Andric   if ((ShiftSize == 32 && ShiftAmt <= 31) ||
861fe6060f1SDimitry Andric       (ShiftSize == 64 && ShiftAmt <= 63))
862fe6060f1SDimitry Andric     return 1;
863fe6060f1SDimitry Andric   return 0;
864fe6060f1SDimitry Andric }
865fe6060f1SDimitry Andric 
866fe6060f1SDimitry Andric /// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
867fe6060f1SDimitry Andric /// instruction \p MI.
86806c3fb27SDimitry Andric bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
869fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_ICMP);
870fe6060f1SDimitry Andric   // Swap the operands if it would introduce a profitable folding opportunity.
871fe6060f1SDimitry Andric   // (e.g. a shift + extend).
872fe6060f1SDimitry Andric   //
873fe6060f1SDimitry Andric   //  For example:
874fe6060f1SDimitry Andric   //    lsl     w13, w11, #1
875fe6060f1SDimitry Andric   //    cmp     w13, w12
876fe6060f1SDimitry Andric   // can be turned into:
877fe6060f1SDimitry Andric   //    cmp     w12, w11, lsl #1
878fe6060f1SDimitry Andric 
879fe6060f1SDimitry Andric   // Don't swap if there's a constant on the RHS, because we know we can fold
880fe6060f1SDimitry Andric   // that.
881fe6060f1SDimitry Andric   Register RHS = MI.getOperand(3).getReg();
882349cc55cSDimitry Andric   auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
883fe6060f1SDimitry Andric   if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
884fe6060f1SDimitry Andric     return false;
885fe6060f1SDimitry Andric 
886fe6060f1SDimitry Andric   Register LHS = MI.getOperand(2).getReg();
887fe6060f1SDimitry Andric   auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
888fe6060f1SDimitry Andric   auto GetRegForProfit = [&](Register Reg) {
889fe6060f1SDimitry Andric     MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
890fe6060f1SDimitry Andric     return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
891fe6060f1SDimitry Andric   };
892fe6060f1SDimitry Andric 
893fe6060f1SDimitry Andric   // Don't have a constant on the RHS. If we swap the LHS and RHS of the
894fe6060f1SDimitry Andric   // compare, would we be able to fold more instructions?
895fe6060f1SDimitry Andric   Register TheLHS = GetRegForProfit(LHS);
896fe6060f1SDimitry Andric   Register TheRHS = GetRegForProfit(RHS);
897fe6060f1SDimitry Andric 
898fe6060f1SDimitry Andric   // If the LHS is more likely to give us a folding opportunity, then swap the
899fe6060f1SDimitry Andric   // LHS and RHS.
900fe6060f1SDimitry Andric   return (getCmpOperandFoldingProfit(TheLHS, MRI) >
901fe6060f1SDimitry Andric           getCmpOperandFoldingProfit(TheRHS, MRI));
902fe6060f1SDimitry Andric }
903fe6060f1SDimitry Andric 
90406c3fb27SDimitry Andric void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
905fe6060f1SDimitry Andric   auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
906fe6060f1SDimitry Andric   Register LHS = MI.getOperand(2).getReg();
907fe6060f1SDimitry Andric   Register RHS = MI.getOperand(3).getReg();
908fe6060f1SDimitry Andric   Observer.changedInstr(MI);
909fe6060f1SDimitry Andric   MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
910fe6060f1SDimitry Andric   MI.getOperand(2).setReg(RHS);
911fe6060f1SDimitry Andric   MI.getOperand(3).setReg(LHS);
912fe6060f1SDimitry Andric   Observer.changedInstr(MI);
913fe6060f1SDimitry Andric }
914fe6060f1SDimitry Andric 
915fe6060f1SDimitry Andric /// \returns a function which builds a vector floating point compare instruction
916fe6060f1SDimitry Andric /// for a condition code \p CC.
917fe6060f1SDimitry Andric /// \param [in] IsZero - True if the comparison is against 0.
918fe6060f1SDimitry Andric /// \param [in] NoNans - True if the target has NoNansFPMath.
91906c3fb27SDimitry Andric std::function<Register(MachineIRBuilder &)>
920fe6060f1SDimitry Andric getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
921fe6060f1SDimitry Andric               bool NoNans, MachineRegisterInfo &MRI) {
922fe6060f1SDimitry Andric   LLT DstTy = MRI.getType(LHS);
923fe6060f1SDimitry Andric   assert(DstTy.isVector() && "Expected vector types only?");
924fe6060f1SDimitry Andric   assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
925fe6060f1SDimitry Andric   switch (CC) {
926fe6060f1SDimitry Andric   default:
927fe6060f1SDimitry Andric     llvm_unreachable("Unexpected condition code!");
928fe6060f1SDimitry Andric   case AArch64CC::NE:
929fe6060f1SDimitry Andric     return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
930fe6060f1SDimitry Andric       auto FCmp = IsZero
931fe6060f1SDimitry Andric                       ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
932fe6060f1SDimitry Andric                       : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
933fe6060f1SDimitry Andric       return MIB.buildNot(DstTy, FCmp).getReg(0);
934fe6060f1SDimitry Andric     };
935fe6060f1SDimitry Andric   case AArch64CC::EQ:
936fe6060f1SDimitry Andric     return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
937fe6060f1SDimitry Andric       return IsZero
938fe6060f1SDimitry Andric                  ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
939fe6060f1SDimitry Andric                  : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
940fe6060f1SDimitry Andric                        .getReg(0);
941fe6060f1SDimitry Andric     };
942fe6060f1SDimitry Andric   case AArch64CC::GE:
943fe6060f1SDimitry Andric     return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
944fe6060f1SDimitry Andric       return IsZero
945fe6060f1SDimitry Andric                  ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
946fe6060f1SDimitry Andric                  : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
947fe6060f1SDimitry Andric                        .getReg(0);
948fe6060f1SDimitry Andric     };
949fe6060f1SDimitry Andric   case AArch64CC::GT:
950fe6060f1SDimitry Andric     return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
951fe6060f1SDimitry Andric       return IsZero
952fe6060f1SDimitry Andric                  ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
953fe6060f1SDimitry Andric                  : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
954fe6060f1SDimitry Andric                        .getReg(0);
955fe6060f1SDimitry Andric     };
956fe6060f1SDimitry Andric   case AArch64CC::LS:
957fe6060f1SDimitry Andric     return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
958fe6060f1SDimitry Andric       return IsZero
959fe6060f1SDimitry Andric                  ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
960fe6060f1SDimitry Andric                  : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
961fe6060f1SDimitry Andric                        .getReg(0);
962fe6060f1SDimitry Andric     };
963fe6060f1SDimitry Andric   case AArch64CC::MI:
964fe6060f1SDimitry Andric     return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
965fe6060f1SDimitry Andric       return IsZero
966fe6060f1SDimitry Andric                  ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
967fe6060f1SDimitry Andric                  : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
968fe6060f1SDimitry Andric                        .getReg(0);
969fe6060f1SDimitry Andric     };
970fe6060f1SDimitry Andric   }
971fe6060f1SDimitry Andric }
972fe6060f1SDimitry Andric 
973fe6060f1SDimitry Andric /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
97406c3fb27SDimitry Andric bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
975fe6060f1SDimitry Andric                           MachineIRBuilder &MIB) {
976fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_FCMP);
977fe6060f1SDimitry Andric   const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
97806c3fb27SDimitry Andric 
979fe6060f1SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
980fe6060f1SDimitry Andric   LLT DstTy = MRI.getType(Dst);
981fe6060f1SDimitry Andric   if (!DstTy.isVector() || !ST.hasNEON())
982fe6060f1SDimitry Andric     return false;
983fe6060f1SDimitry Andric   Register LHS = MI.getOperand(2).getReg();
984fe6060f1SDimitry Andric   unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
98506c3fb27SDimitry Andric   if (EltSize == 16 && !ST.hasFullFP16())
986fe6060f1SDimitry Andric     return false;
98706c3fb27SDimitry Andric   if (EltSize != 16 && EltSize != 32 && EltSize != 64)
98806c3fb27SDimitry Andric     return false;
98906c3fb27SDimitry Andric 
99006c3fb27SDimitry Andric   return true;
99106c3fb27SDimitry Andric }
99206c3fb27SDimitry Andric 
99306c3fb27SDimitry Andric /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
99406c3fb27SDimitry Andric void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
99506c3fb27SDimitry Andric                           MachineIRBuilder &MIB) {
99606c3fb27SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_FCMP);
99706c3fb27SDimitry Andric   const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
99806c3fb27SDimitry Andric 
99906c3fb27SDimitry Andric   const auto &CmpMI = cast<GFCmp>(MI);
100006c3fb27SDimitry Andric 
100106c3fb27SDimitry Andric   Register Dst = CmpMI.getReg(0);
100206c3fb27SDimitry Andric   CmpInst::Predicate Pred = CmpMI.getCond();
100306c3fb27SDimitry Andric   Register LHS = CmpMI.getLHSReg();
100406c3fb27SDimitry Andric   Register RHS = CmpMI.getRHSReg();
100506c3fb27SDimitry Andric 
100606c3fb27SDimitry Andric   LLT DstTy = MRI.getType(Dst);
100706c3fb27SDimitry Andric 
1008fe6060f1SDimitry Andric   auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
1009fe6060f1SDimitry Andric 
1010fe6060f1SDimitry Andric   // Compares against 0 have special target-specific pseudos.
1011fe6060f1SDimitry Andric   bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
1012bdd1243dSDimitry Andric 
1013bdd1243dSDimitry Andric   bool Invert = false;
1014bdd1243dSDimitry Andric   AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
1015*0fca6ea1SDimitry Andric   if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1016*0fca6ea1SDimitry Andric        Pred == CmpInst::Predicate::FCMP_UNO) &&
1017*0fca6ea1SDimitry Andric       IsZero) {
1018bdd1243dSDimitry Andric     // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1019bdd1243dSDimitry Andric     // NaN, so equivalent to a == a and doesn't need the two comparisons an
1020bdd1243dSDimitry Andric     // "ord" normally would.
1021*0fca6ea1SDimitry Andric     // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1022*0fca6ea1SDimitry Andric     // thus equivalent to a != a.
1023bdd1243dSDimitry Andric     RHS = LHS;
1024bdd1243dSDimitry Andric     IsZero = false;
1025*0fca6ea1SDimitry Andric     CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1026bdd1243dSDimitry Andric   } else
1027fe6060f1SDimitry Andric     changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1028bdd1243dSDimitry Andric 
1029fe6060f1SDimitry Andric   // Instead of having an apply function, just build here to simplify things.
1030fe6060f1SDimitry Andric   MIB.setInstrAndDebugLoc(MI);
103106c3fb27SDimitry Andric 
103206c3fb27SDimitry Andric   const bool NoNans =
103306c3fb27SDimitry Andric       ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
103406c3fb27SDimitry Andric 
1035fe6060f1SDimitry Andric   auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1036fe6060f1SDimitry Andric   Register CmpRes;
1037fe6060f1SDimitry Andric   if (CC2 == AArch64CC::AL)
1038fe6060f1SDimitry Andric     CmpRes = Cmp(MIB);
1039fe6060f1SDimitry Andric   else {
1040fe6060f1SDimitry Andric     auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1041fe6060f1SDimitry Andric     auto Cmp2Dst = Cmp2(MIB);
1042fe6060f1SDimitry Andric     auto Cmp1Dst = Cmp(MIB);
1043fe6060f1SDimitry Andric     CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1044fe6060f1SDimitry Andric   }
1045fe6060f1SDimitry Andric   if (Invert)
1046fe6060f1SDimitry Andric     CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1047fe6060f1SDimitry Andric   MRI.replaceRegWith(Dst, CmpRes);
1048fe6060f1SDimitry Andric   MI.eraseFromParent();
1049fe6060f1SDimitry Andric }
1050fe6060f1SDimitry Andric 
105106c3fb27SDimitry Andric bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1052fe6060f1SDimitry Andric                          Register &SrcReg) {
1053fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_STORE);
1054fe6060f1SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1055fe6060f1SDimitry Andric   if (MRI.getType(DstReg).isVector())
1056fe6060f1SDimitry Andric     return false;
1057fe6060f1SDimitry Andric   // Match a store of a truncate.
1058fe6060f1SDimitry Andric   if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1059fe6060f1SDimitry Andric     return false;
1060fe6060f1SDimitry Andric   // Only form truncstores for value types of max 64b.
1061fe6060f1SDimitry Andric   return MRI.getType(SrcReg).getSizeInBits() <= 64;
1062fe6060f1SDimitry Andric }
1063fe6060f1SDimitry Andric 
106406c3fb27SDimitry Andric void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
106506c3fb27SDimitry Andric                          MachineIRBuilder &B, GISelChangeObserver &Observer,
1066fe6060f1SDimitry Andric                          Register &SrcReg) {
1067fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_STORE);
1068fe6060f1SDimitry Andric   Observer.changingInstr(MI);
1069fe6060f1SDimitry Andric   MI.getOperand(0).setReg(SrcReg);
1070fe6060f1SDimitry Andric   Observer.changedInstr(MI);
1071fe6060f1SDimitry Andric }
1072fe6060f1SDimitry Andric 
1073bdd1243dSDimitry Andric // Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1074bdd1243dSDimitry Andric // form in the first place for combine opportunities, so any remaining ones
1075bdd1243dSDimitry Andric // at this stage need be lowered back.
107606c3fb27SDimitry Andric bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1077bdd1243dSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1078bdd1243dSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1079bdd1243dSDimitry Andric   LLT DstTy = MRI.getType(DstReg);
1080bdd1243dSDimitry Andric   return DstTy.isVector();
1081bdd1243dSDimitry Andric }
1082bdd1243dSDimitry Andric 
108306c3fb27SDimitry Andric void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
108406c3fb27SDimitry Andric                           MachineIRBuilder &B, GISelChangeObserver &Observer) {
1085bdd1243dSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1086bdd1243dSDimitry Andric   B.setInstrAndDebugLoc(MI);
1087bdd1243dSDimitry Andric   LegalizerHelper Helper(*MI.getMF(), Observer, B);
1088bdd1243dSDimitry Andric   Helper.lower(MI, 0, /* Unused hint type */ LLT());
1089bdd1243dSDimitry Andric }
1090bdd1243dSDimitry Andric 
10915f757f3fSDimitry Andric /// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
10925f757f3fSDimitry Andric ///           => unused, <N x t> = unmerge v
10935f757f3fSDimitry Andric bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
10945f757f3fSDimitry Andric                               Register &MatchInfo) {
10955f757f3fSDimitry Andric   auto &Unmerge = cast<GUnmerge>(MI);
10965f757f3fSDimitry Andric   if (Unmerge.getNumDefs() != 2)
10975f757f3fSDimitry Andric     return false;
10985f757f3fSDimitry Andric   if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
10995f757f3fSDimitry Andric     return false;
11005f757f3fSDimitry Andric 
11015f757f3fSDimitry Andric   LLT DstTy = MRI.getType(Unmerge.getReg(0));
11025f757f3fSDimitry Andric   if (!DstTy.isVector())
11035f757f3fSDimitry Andric     return false;
11045f757f3fSDimitry Andric 
11055f757f3fSDimitry Andric   MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
11065f757f3fSDimitry Andric   if (!Ext)
11075f757f3fSDimitry Andric     return false;
11085f757f3fSDimitry Andric 
11095f757f3fSDimitry Andric   Register ExtSrc1 = Ext->getOperand(1).getReg();
11105f757f3fSDimitry Andric   Register ExtSrc2 = Ext->getOperand(2).getReg();
11115f757f3fSDimitry Andric   auto LowestVal =
11125f757f3fSDimitry Andric       getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
11135f757f3fSDimitry Andric   if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
11145f757f3fSDimitry Andric     return false;
11155f757f3fSDimitry Andric 
11165f757f3fSDimitry Andric   if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
11175f757f3fSDimitry Andric     return false;
11185f757f3fSDimitry Andric 
11195f757f3fSDimitry Andric   MatchInfo = ExtSrc1;
11205f757f3fSDimitry Andric   return true;
11215f757f3fSDimitry Andric }
11225f757f3fSDimitry Andric 
11235f757f3fSDimitry Andric void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
11245f757f3fSDimitry Andric                               MachineIRBuilder &B,
11255f757f3fSDimitry Andric                               GISelChangeObserver &Observer, Register &SrcReg) {
11265f757f3fSDimitry Andric   Observer.changingInstr(MI);
11275f757f3fSDimitry Andric   // Swap dst registers.
11285f757f3fSDimitry Andric   Register Dst1 = MI.getOperand(0).getReg();
11295f757f3fSDimitry Andric   MI.getOperand(0).setReg(MI.getOperand(1).getReg());
11305f757f3fSDimitry Andric   MI.getOperand(1).setReg(Dst1);
11315f757f3fSDimitry Andric   MI.getOperand(2).setReg(SrcReg);
11325f757f3fSDimitry Andric   Observer.changedInstr(MI);
11335f757f3fSDimitry Andric }
11345f757f3fSDimitry Andric 
11355f757f3fSDimitry Andric // Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
11365f757f3fSDimitry Andric // Match v2s64 mul instructions, which will then be scalarised later on
11375f757f3fSDimitry Andric // Doing these two matches in one function to ensure that the order of matching
11385f757f3fSDimitry Andric // will always be the same.
11395f757f3fSDimitry Andric // Try lowering MUL to MULL before trying to scalarize if needed.
11405f757f3fSDimitry Andric bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
11415f757f3fSDimitry Andric   // Get the instructions that defined the source operand
11425f757f3fSDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
11435f757f3fSDimitry Andric   MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
11445f757f3fSDimitry Andric   MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
11455f757f3fSDimitry Andric 
11465f757f3fSDimitry Andric   if (DstTy.isVector()) {
11475f757f3fSDimitry Andric     // If the source operands were EXTENDED before, then {U/S}MULL can be used
11485f757f3fSDimitry Andric     unsigned I1Opc = I1->getOpcode();
11495f757f3fSDimitry Andric     unsigned I2Opc = I2->getOpcode();
11505f757f3fSDimitry Andric     if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
11515f757f3fSDimitry Andric          (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
11525f757f3fSDimitry Andric         (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
11535f757f3fSDimitry Andric          MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
11545f757f3fSDimitry Andric         (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
11555f757f3fSDimitry Andric          MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
11565f757f3fSDimitry Andric       return true;
11575f757f3fSDimitry Andric     }
11585f757f3fSDimitry Andric     // If result type is v2s64, scalarise the instruction
11595f757f3fSDimitry Andric     else if (DstTy == LLT::fixed_vector(2, 64)) {
11605f757f3fSDimitry Andric       return true;
11615f757f3fSDimitry Andric     }
11625f757f3fSDimitry Andric   }
11635f757f3fSDimitry Andric   return false;
11645f757f3fSDimitry Andric }
11655f757f3fSDimitry Andric 
11665f757f3fSDimitry Andric void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
11675f757f3fSDimitry Andric                        MachineIRBuilder &B, GISelChangeObserver &Observer) {
11685f757f3fSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_MUL &&
11695f757f3fSDimitry Andric          "Expected a G_MUL instruction");
11705f757f3fSDimitry Andric 
11715f757f3fSDimitry Andric   // Get the instructions that defined the source operand
11725f757f3fSDimitry Andric   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
11735f757f3fSDimitry Andric   MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
11745f757f3fSDimitry Andric   MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
11755f757f3fSDimitry Andric 
11765f757f3fSDimitry Andric   // If the source operands were EXTENDED before, then {U/S}MULL can be used
11775f757f3fSDimitry Andric   unsigned I1Opc = I1->getOpcode();
11785f757f3fSDimitry Andric   unsigned I2Opc = I2->getOpcode();
11795f757f3fSDimitry Andric   if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
11805f757f3fSDimitry Andric        (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
11815f757f3fSDimitry Andric       (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
11825f757f3fSDimitry Andric        MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
11835f757f3fSDimitry Andric       (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
11845f757f3fSDimitry Andric        MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
11855f757f3fSDimitry Andric 
11865f757f3fSDimitry Andric     B.setInstrAndDebugLoc(MI);
11875f757f3fSDimitry Andric     B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
11885f757f3fSDimitry Andric                                                          : AArch64::G_SMULL,
11895f757f3fSDimitry Andric                  {MI.getOperand(0).getReg()},
11905f757f3fSDimitry Andric                  {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
11915f757f3fSDimitry Andric     MI.eraseFromParent();
11925f757f3fSDimitry Andric   }
11935f757f3fSDimitry Andric   // If result type is v2s64, scalarise the instruction
11945f757f3fSDimitry Andric   else if (DstTy == LLT::fixed_vector(2, 64)) {
11955f757f3fSDimitry Andric     LegalizerHelper Helper(*MI.getMF(), Observer, B);
11965f757f3fSDimitry Andric     B.setInstrAndDebugLoc(MI);
11975f757f3fSDimitry Andric     Helper.fewerElementsVector(
11985f757f3fSDimitry Andric         MI, 0,
11995f757f3fSDimitry Andric         DstTy.changeElementCount(
12005f757f3fSDimitry Andric             DstTy.getElementCount().divideCoefficientBy(2)));
12015f757f3fSDimitry Andric   }
12025f757f3fSDimitry Andric }
12035f757f3fSDimitry Andric 
12045f757f3fSDimitry Andric class AArch64PostLegalizerLoweringImpl : public Combiner {
120506c3fb27SDimitry Andric protected:
12065f757f3fSDimitry Andric   // TODO: Make CombinerHelper methods const.
12075f757f3fSDimitry Andric   mutable CombinerHelper Helper;
120806c3fb27SDimitry Andric   const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
120906c3fb27SDimitry Andric   const AArch64Subtarget &STI;
121006c3fb27SDimitry Andric 
121106c3fb27SDimitry Andric public:
121206c3fb27SDimitry Andric   AArch64PostLegalizerLoweringImpl(
12135f757f3fSDimitry Andric       MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
12145f757f3fSDimitry Andric       GISelCSEInfo *CSEInfo,
121506c3fb27SDimitry Andric       const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
12165f757f3fSDimitry Andric       const AArch64Subtarget &STI);
121706c3fb27SDimitry Andric 
121806c3fb27SDimitry Andric   static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
121906c3fb27SDimitry Andric 
12205f757f3fSDimitry Andric   bool tryCombineAll(MachineInstr &I) const override;
122106c3fb27SDimitry Andric 
122206c3fb27SDimitry Andric private:
122306c3fb27SDimitry Andric #define GET_GICOMBINER_CLASS_MEMBERS
1224e8d8bef9SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
122506c3fb27SDimitry Andric #undef GET_GICOMBINER_CLASS_MEMBERS
122606c3fb27SDimitry Andric };
122706c3fb27SDimitry Andric 
122806c3fb27SDimitry Andric #define GET_GICOMBINER_IMPL
122906c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
123006c3fb27SDimitry Andric #undef GET_GICOMBINER_IMPL
123106c3fb27SDimitry Andric 
123206c3fb27SDimitry Andric AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
12335f757f3fSDimitry Andric     MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
12345f757f3fSDimitry Andric     GISelCSEInfo *CSEInfo,
123506c3fb27SDimitry Andric     const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
12365f757f3fSDimitry Andric     const AArch64Subtarget &STI)
12375f757f3fSDimitry Andric     : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo),
12385f757f3fSDimitry Andric       Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
12395f757f3fSDimitry Andric       STI(STI),
124006c3fb27SDimitry Andric #define GET_GICOMBINER_CONSTRUCTOR_INITS
124106c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
124206c3fb27SDimitry Andric #undef GET_GICOMBINER_CONSTRUCTOR_INITS
124306c3fb27SDimitry Andric {
124406c3fb27SDimitry Andric }
1245e8d8bef9SDimitry Andric 
1246e8d8bef9SDimitry Andric class AArch64PostLegalizerLowering : public MachineFunctionPass {
1247e8d8bef9SDimitry Andric public:
1248e8d8bef9SDimitry Andric   static char ID;
1249e8d8bef9SDimitry Andric 
1250e8d8bef9SDimitry Andric   AArch64PostLegalizerLowering();
1251e8d8bef9SDimitry Andric 
1252e8d8bef9SDimitry Andric   StringRef getPassName() const override {
1253e8d8bef9SDimitry Andric     return "AArch64PostLegalizerLowering";
1254e8d8bef9SDimitry Andric   }
1255e8d8bef9SDimitry Andric 
1256e8d8bef9SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
1257e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
12585f757f3fSDimitry Andric 
12595f757f3fSDimitry Andric private:
12605f757f3fSDimitry Andric   AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1261e8d8bef9SDimitry Andric };
1262e8d8bef9SDimitry Andric } // end anonymous namespace
1263e8d8bef9SDimitry Andric 
1264e8d8bef9SDimitry Andric void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1265e8d8bef9SDimitry Andric   AU.addRequired<TargetPassConfig>();
1266e8d8bef9SDimitry Andric   AU.setPreservesCFG();
1267e8d8bef9SDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
1268e8d8bef9SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
1269e8d8bef9SDimitry Andric }
1270e8d8bef9SDimitry Andric 
1271e8d8bef9SDimitry Andric AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1272e8d8bef9SDimitry Andric     : MachineFunctionPass(ID) {
1273e8d8bef9SDimitry Andric   initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
12745f757f3fSDimitry Andric 
12755f757f3fSDimitry Andric   if (!RuleConfig.parseCommandLineOption())
12765f757f3fSDimitry Andric     report_fatal_error("Invalid rule identifier");
1277e8d8bef9SDimitry Andric }
1278e8d8bef9SDimitry Andric 
1279e8d8bef9SDimitry Andric bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1280e8d8bef9SDimitry Andric   if (MF.getProperties().hasProperty(
1281e8d8bef9SDimitry Andric           MachineFunctionProperties::Property::FailedISel))
1282e8d8bef9SDimitry Andric     return false;
1283e8d8bef9SDimitry Andric   assert(MF.getProperties().hasProperty(
1284e8d8bef9SDimitry Andric              MachineFunctionProperties::Property::Legalized) &&
1285e8d8bef9SDimitry Andric          "Expected a legalized function?");
1286e8d8bef9SDimitry Andric   auto *TPC = &getAnalysis<TargetPassConfig>();
1287e8d8bef9SDimitry Andric   const Function &F = MF.getFunction();
12885f757f3fSDimitry Andric 
12895f757f3fSDimitry Andric   const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
12905f757f3fSDimitry Andric   CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
12915f757f3fSDimitry Andric                      /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
12925f757f3fSDimitry Andric                      F.hasOptSize(), F.hasMinSize());
12935f757f3fSDimitry Andric   AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
12945f757f3fSDimitry Andric                                         RuleConfig, ST);
12955f757f3fSDimitry Andric   return Impl.combineMachineInstrs();
1296e8d8bef9SDimitry Andric }
1297e8d8bef9SDimitry Andric 
1298e8d8bef9SDimitry Andric char AArch64PostLegalizerLowering::ID = 0;
1299e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1300e8d8bef9SDimitry Andric                       "Lower AArch64 MachineInstrs after legalization", false,
1301e8d8bef9SDimitry Andric                       false)
1302e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
1303e8d8bef9SDimitry Andric INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1304e8d8bef9SDimitry Andric                     "Lower AArch64 MachineInstrs after legalization", false,
1305e8d8bef9SDimitry Andric                     false)
1306e8d8bef9SDimitry Andric 
1307e8d8bef9SDimitry Andric namespace llvm {
1308e8d8bef9SDimitry Andric FunctionPass *createAArch64PostLegalizerLowering() {
1309e8d8bef9SDimitry Andric   return new AArch64PostLegalizerLowering();
1310e8d8bef9SDimitry Andric }
1311e8d8bef9SDimitry Andric } // end namespace llvm
1312