xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric //
95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level,
105ffd83dbSDimitry Andric // before the legalizer.
115ffd83dbSDimitry Andric //
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
14fe6060f1SDimitry Andric #include "AArch64GlobalISelUtils.h"
155ffd83dbSDimitry Andric #include "AArch64TargetMachine.h"
165ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
175ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
185ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
225ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
23fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
245ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
25fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
265ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
27fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h"
285ffd83dbSDimitry Andric #include "llvm/Support/Debug.h"
295ffd83dbSDimitry Andric 
305ffd83dbSDimitry Andric #define DEBUG_TYPE "aarch64-prelegalizer-combiner"
315ffd83dbSDimitry Andric 
325ffd83dbSDimitry Andric using namespace llvm;
335ffd83dbSDimitry Andric using namespace MIPatternMatch;
345ffd83dbSDimitry Andric 
355ffd83dbSDimitry Andric /// Return true if a G_FCONSTANT instruction is known to be better-represented
365ffd83dbSDimitry Andric /// as a G_CONSTANT.
375ffd83dbSDimitry Andric static bool matchFConstantToConstant(MachineInstr &MI,
385ffd83dbSDimitry Andric                                      MachineRegisterInfo &MRI) {
395ffd83dbSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
405ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
415ffd83dbSDimitry Andric   const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
425ffd83dbSDimitry Andric   if (DstSize != 32 && DstSize != 64)
435ffd83dbSDimitry Andric     return false;
445ffd83dbSDimitry Andric 
455ffd83dbSDimitry Andric   // When we're storing a value, it doesn't matter what register bank it's on.
465ffd83dbSDimitry Andric   // Since not all floating point constants can be materialized using a fmov,
475ffd83dbSDimitry Andric   // it makes more sense to just use a GPR.
485ffd83dbSDimitry Andric   return all_of(MRI.use_nodbg_instructions(DstReg),
495ffd83dbSDimitry Andric                 [](const MachineInstr &Use) { return Use.mayStore(); });
505ffd83dbSDimitry Andric }
515ffd83dbSDimitry Andric 
525ffd83dbSDimitry Andric /// Change a G_FCONSTANT into a G_CONSTANT.
535ffd83dbSDimitry Andric static void applyFConstantToConstant(MachineInstr &MI) {
545ffd83dbSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
555ffd83dbSDimitry Andric   MachineIRBuilder MIB(MI);
565ffd83dbSDimitry Andric   const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
575ffd83dbSDimitry Andric   MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
585ffd83dbSDimitry Andric   MI.eraseFromParent();
595ffd83dbSDimitry Andric }
605ffd83dbSDimitry Andric 
61fe6060f1SDimitry Andric /// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
62fe6060f1SDimitry Andric /// are sign bits. In this case, we can transform the G_ICMP to directly compare
63fe6060f1SDimitry Andric /// the wide value with a zero.
64fe6060f1SDimitry Andric static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
65fe6060f1SDimitry Andric                                     GISelKnownBits *KB, Register &MatchInfo) {
66fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB);
67fe6060f1SDimitry Andric 
68fe6060f1SDimitry Andric   auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
69fe6060f1SDimitry Andric   if (!ICmpInst::isEquality(Pred))
70fe6060f1SDimitry Andric     return false;
71fe6060f1SDimitry Andric 
72fe6060f1SDimitry Andric   Register LHS = MI.getOperand(2).getReg();
73fe6060f1SDimitry Andric   LLT LHSTy = MRI.getType(LHS);
74fe6060f1SDimitry Andric   if (!LHSTy.isScalar())
75fe6060f1SDimitry Andric     return false;
76fe6060f1SDimitry Andric 
77fe6060f1SDimitry Andric   Register RHS = MI.getOperand(3).getReg();
78fe6060f1SDimitry Andric   Register WideReg;
79fe6060f1SDimitry Andric 
80fe6060f1SDimitry Andric   if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
81fe6060f1SDimitry Andric       !mi_match(RHS, MRI, m_SpecificICst(0)))
82fe6060f1SDimitry Andric     return false;
83fe6060f1SDimitry Andric 
84fe6060f1SDimitry Andric   LLT WideTy = MRI.getType(WideReg);
85fe6060f1SDimitry Andric   if (KB->computeNumSignBits(WideReg) <=
86fe6060f1SDimitry Andric       WideTy.getSizeInBits() - LHSTy.getSizeInBits())
87fe6060f1SDimitry Andric     return false;
88fe6060f1SDimitry Andric 
89fe6060f1SDimitry Andric   MatchInfo = WideReg;
90fe6060f1SDimitry Andric   return true;
91fe6060f1SDimitry Andric }
92fe6060f1SDimitry Andric 
93fe6060f1SDimitry Andric static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
94fe6060f1SDimitry Andric                                     MachineIRBuilder &Builder,
95fe6060f1SDimitry Andric                                     GISelChangeObserver &Observer,
96fe6060f1SDimitry Andric                                     Register &WideReg) {
97fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_ICMP);
98fe6060f1SDimitry Andric 
99fe6060f1SDimitry Andric   LLT WideTy = MRI.getType(WideReg);
100fe6060f1SDimitry Andric   // We're going to directly use the wide register as the LHS, and then use an
101fe6060f1SDimitry Andric   // equivalent size zero for RHS.
102fe6060f1SDimitry Andric   Builder.setInstrAndDebugLoc(MI);
103fe6060f1SDimitry Andric   auto WideZero = Builder.buildConstant(WideTy, 0);
104fe6060f1SDimitry Andric   Observer.changingInstr(MI);
105fe6060f1SDimitry Andric   MI.getOperand(2).setReg(WideReg);
106fe6060f1SDimitry Andric   MI.getOperand(3).setReg(WideZero.getReg(0));
107fe6060f1SDimitry Andric   Observer.changedInstr(MI);
108fe6060f1SDimitry Andric   return true;
109fe6060f1SDimitry Andric }
110fe6060f1SDimitry Andric 
111fe6060f1SDimitry Andric /// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
112fe6060f1SDimitry Andric ///
113fe6060f1SDimitry Andric /// e.g.
114fe6060f1SDimitry Andric ///
115fe6060f1SDimitry Andric /// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
116fe6060f1SDimitry Andric static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
117fe6060f1SDimitry Andric                                   std::pair<uint64_t, uint64_t> &MatchInfo) {
118fe6060f1SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
119fe6060f1SDimitry Andric   MachineFunction &MF = *MI.getMF();
120fe6060f1SDimitry Andric   auto &GlobalOp = MI.getOperand(1);
121fe6060f1SDimitry Andric   auto *GV = GlobalOp.getGlobal();
122fe6060f1SDimitry Andric   if (GV->isThreadLocal())
123fe6060f1SDimitry Andric     return false;
124fe6060f1SDimitry Andric 
125fe6060f1SDimitry Andric   // Don't allow anything that could represent offsets etc.
126fe6060f1SDimitry Andric   if (MF.getSubtarget<AArch64Subtarget>().ClassifyGlobalReference(
127fe6060f1SDimitry Andric           GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
128fe6060f1SDimitry Andric     return false;
129fe6060f1SDimitry Andric 
130fe6060f1SDimitry Andric   // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
131fe6060f1SDimitry Andric   //
132fe6060f1SDimitry Andric   //  %g = G_GLOBAL_VALUE @x
133fe6060f1SDimitry Andric   //  %ptr1 = G_PTR_ADD %g, cst1
134fe6060f1SDimitry Andric   //  %ptr2 = G_PTR_ADD %g, cst2
135fe6060f1SDimitry Andric   //  ...
136fe6060f1SDimitry Andric   //  %ptrN = G_PTR_ADD %g, cstN
137fe6060f1SDimitry Andric   //
138fe6060f1SDimitry Andric   // Identify the *smallest* constant. We want to be able to form this:
139fe6060f1SDimitry Andric   //
140fe6060f1SDimitry Andric   //  %offset_g = G_GLOBAL_VALUE @x + min_cst
141fe6060f1SDimitry Andric   //  %g = G_PTR_ADD %offset_g, -min_cst
142fe6060f1SDimitry Andric   //  %ptr1 = G_PTR_ADD %g, cst1
143fe6060f1SDimitry Andric   //  ...
144fe6060f1SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
145fe6060f1SDimitry Andric   uint64_t MinOffset = -1ull;
146fe6060f1SDimitry Andric   for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
147fe6060f1SDimitry Andric     if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
148fe6060f1SDimitry Andric       return false;
149*349cc55cSDimitry Andric     auto Cst = getIConstantVRegValWithLookThrough(
150*349cc55cSDimitry Andric         UseInstr.getOperand(2).getReg(), MRI);
151fe6060f1SDimitry Andric     if (!Cst)
152fe6060f1SDimitry Andric       return false;
153fe6060f1SDimitry Andric     MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
154fe6060f1SDimitry Andric   }
155fe6060f1SDimitry Andric 
156fe6060f1SDimitry Andric   // Require that the new offset is larger than the existing one to avoid
157fe6060f1SDimitry Andric   // infinite loops.
158fe6060f1SDimitry Andric   uint64_t CurrOffset = GlobalOp.getOffset();
159fe6060f1SDimitry Andric   uint64_t NewOffset = MinOffset + CurrOffset;
160fe6060f1SDimitry Andric   if (NewOffset <= CurrOffset)
161fe6060f1SDimitry Andric     return false;
162fe6060f1SDimitry Andric 
163fe6060f1SDimitry Andric   // Check whether folding this offset is legal. It must not go out of bounds of
164fe6060f1SDimitry Andric   // the referenced object to avoid violating the code model, and must be
165fe6060f1SDimitry Andric   // smaller than 2^21 because this is the largest offset expressible in all
166fe6060f1SDimitry Andric   // object formats.
167fe6060f1SDimitry Andric   //
168fe6060f1SDimitry Andric   // This check also prevents us from folding negative offsets, which will end
169fe6060f1SDimitry Andric   // up being treated in the same way as large positive ones. They could also
170fe6060f1SDimitry Andric   // cause code model violations, and aren't really common enough to matter.
171fe6060f1SDimitry Andric   if (NewOffset >= (1 << 21))
172fe6060f1SDimitry Andric     return false;
173fe6060f1SDimitry Andric 
174fe6060f1SDimitry Andric   Type *T = GV->getValueType();
175fe6060f1SDimitry Andric   if (!T->isSized() ||
176fe6060f1SDimitry Andric       NewOffset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
177fe6060f1SDimitry Andric     return false;
178fe6060f1SDimitry Andric   MatchInfo = std::make_pair(NewOffset, MinOffset);
179fe6060f1SDimitry Andric   return true;
180fe6060f1SDimitry Andric }
181fe6060f1SDimitry Andric 
182fe6060f1SDimitry Andric static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
183fe6060f1SDimitry Andric                                   MachineIRBuilder &B,
184fe6060f1SDimitry Andric                                   GISelChangeObserver &Observer,
185fe6060f1SDimitry Andric                                   std::pair<uint64_t, uint64_t> &MatchInfo) {
186fe6060f1SDimitry Andric   // Change:
187fe6060f1SDimitry Andric   //
188fe6060f1SDimitry Andric   //  %g = G_GLOBAL_VALUE @x
189fe6060f1SDimitry Andric   //  %ptr1 = G_PTR_ADD %g, cst1
190fe6060f1SDimitry Andric   //  %ptr2 = G_PTR_ADD %g, cst2
191fe6060f1SDimitry Andric   //  ...
192fe6060f1SDimitry Andric   //  %ptrN = G_PTR_ADD %g, cstN
193fe6060f1SDimitry Andric   //
194fe6060f1SDimitry Andric   // To:
195fe6060f1SDimitry Andric   //
196fe6060f1SDimitry Andric   //  %offset_g = G_GLOBAL_VALUE @x + min_cst
197fe6060f1SDimitry Andric   //  %g = G_PTR_ADD %offset_g, -min_cst
198fe6060f1SDimitry Andric   //  %ptr1 = G_PTR_ADD %g, cst1
199fe6060f1SDimitry Andric   //  ...
200fe6060f1SDimitry Andric   //  %ptrN = G_PTR_ADD %g, cstN
201fe6060f1SDimitry Andric   //
202fe6060f1SDimitry Andric   // Then, the original G_PTR_ADDs should be folded later on so that they look
203fe6060f1SDimitry Andric   // like this:
204fe6060f1SDimitry Andric   //
205fe6060f1SDimitry Andric   //  %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
206fe6060f1SDimitry Andric   uint64_t Offset, MinOffset;
207fe6060f1SDimitry Andric   std::tie(Offset, MinOffset) = MatchInfo;
208fe6060f1SDimitry Andric   B.setInstrAndDebugLoc(MI);
209fe6060f1SDimitry Andric   Observer.changingInstr(MI);
210fe6060f1SDimitry Andric   auto &GlobalOp = MI.getOperand(1);
211fe6060f1SDimitry Andric   auto *GV = GlobalOp.getGlobal();
212fe6060f1SDimitry Andric   GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
213fe6060f1SDimitry Andric   Register Dst = MI.getOperand(0).getReg();
214fe6060f1SDimitry Andric   Register NewGVDst = MRI.cloneVirtualRegister(Dst);
215fe6060f1SDimitry Andric   MI.getOperand(0).setReg(NewGVDst);
216fe6060f1SDimitry Andric   Observer.changedInstr(MI);
217fe6060f1SDimitry Andric   B.buildPtrAdd(
218fe6060f1SDimitry Andric       Dst, NewGVDst,
219fe6060f1SDimitry Andric       B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
220fe6060f1SDimitry Andric   return true;
221fe6060f1SDimitry Andric }
222fe6060f1SDimitry Andric 
223*349cc55cSDimitry Andric static bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
224*349cc55cSDimitry Andric                                CombinerHelper &Helper,
225*349cc55cSDimitry Andric                                GISelChangeObserver &Observer) {
226*349cc55cSDimitry Andric   // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
227*349cc55cSDimitry Andric   // result is only used in the no-overflow case. It is restricted to cases
228*349cc55cSDimitry Andric   // where we know that the high-bits of the operands are 0. If there's an
229*349cc55cSDimitry Andric   // overflow, then the the 9th or 17th bit must be set, which can be checked
230*349cc55cSDimitry Andric   // using TBNZ.
231*349cc55cSDimitry Andric   //
232*349cc55cSDimitry Andric   // Change (for UADDOs on 8 and 16 bits):
233*349cc55cSDimitry Andric   //
234*349cc55cSDimitry Andric   //   %z0 = G_ASSERT_ZEXT _
235*349cc55cSDimitry Andric   //   %op0 = G_TRUNC %z0
236*349cc55cSDimitry Andric   //   %z1 = G_ASSERT_ZEXT _
237*349cc55cSDimitry Andric   //   %op1 = G_TRUNC %z1
238*349cc55cSDimitry Andric   //   %val, %cond = G_UADDO %op0, %op1
239*349cc55cSDimitry Andric   //   G_BRCOND %cond, %error.bb
240*349cc55cSDimitry Andric   //
241*349cc55cSDimitry Andric   // error.bb:
242*349cc55cSDimitry Andric   //   (no successors and no uses of %val)
243*349cc55cSDimitry Andric   //
244*349cc55cSDimitry Andric   // To:
245*349cc55cSDimitry Andric   //
246*349cc55cSDimitry Andric   //   %z0 = G_ASSERT_ZEXT _
247*349cc55cSDimitry Andric   //   %z1 = G_ASSERT_ZEXT _
248*349cc55cSDimitry Andric   //   %add = G_ADD %z0, %z1
249*349cc55cSDimitry Andric   //   %val = G_TRUNC %add
250*349cc55cSDimitry Andric   //   %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
251*349cc55cSDimitry Andric   //   %cond = G_ICMP NE, %bit, 0
252*349cc55cSDimitry Andric   //   G_BRCOND %cond, %error.bb
253*349cc55cSDimitry Andric 
254*349cc55cSDimitry Andric   auto &MRI = *B.getMRI();
255*349cc55cSDimitry Andric 
256*349cc55cSDimitry Andric   MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
257*349cc55cSDimitry Andric   MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
258*349cc55cSDimitry Andric   Register Op0Wide;
259*349cc55cSDimitry Andric   Register Op1Wide;
260*349cc55cSDimitry Andric   if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
261*349cc55cSDimitry Andric       !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
262*349cc55cSDimitry Andric     return false;
263*349cc55cSDimitry Andric   LLT WideTy0 = MRI.getType(Op0Wide);
264*349cc55cSDimitry Andric   LLT WideTy1 = MRI.getType(Op1Wide);
265*349cc55cSDimitry Andric   Register ResVal = MI.getOperand(0).getReg();
266*349cc55cSDimitry Andric   LLT OpTy = MRI.getType(ResVal);
267*349cc55cSDimitry Andric   MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
268*349cc55cSDimitry Andric   MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
269*349cc55cSDimitry Andric 
270*349cc55cSDimitry Andric   unsigned OpTySize = OpTy.getScalarSizeInBits();
271*349cc55cSDimitry Andric   // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
272*349cc55cSDimitry Andric   // inputs have been zero-extended.
273*349cc55cSDimitry Andric   if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
274*349cc55cSDimitry Andric       Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
275*349cc55cSDimitry Andric       OpTySize != Op0WideDef->getOperand(2).getImm() ||
276*349cc55cSDimitry Andric       OpTySize != Op1WideDef->getOperand(2).getImm())
277*349cc55cSDimitry Andric     return false;
278*349cc55cSDimitry Andric 
279*349cc55cSDimitry Andric   // Only scalar UADDO with either 8 or 16 bit operands are handled.
280*349cc55cSDimitry Andric   if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
281*349cc55cSDimitry Andric       OpTySize >= WideTy0.getScalarSizeInBits() ||
282*349cc55cSDimitry Andric       (OpTySize != 8 && OpTySize != 16))
283*349cc55cSDimitry Andric     return false;
284*349cc55cSDimitry Andric 
285*349cc55cSDimitry Andric   // The overflow-status result must be used by a branch only.
286*349cc55cSDimitry Andric   Register ResStatus = MI.getOperand(1).getReg();
287*349cc55cSDimitry Andric   if (!MRI.hasOneNonDBGUse(ResStatus))
288*349cc55cSDimitry Andric     return false;
289*349cc55cSDimitry Andric   MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
290*349cc55cSDimitry Andric   if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
291*349cc55cSDimitry Andric     return false;
292*349cc55cSDimitry Andric 
293*349cc55cSDimitry Andric   // Make sure the computed result is only used in the no-overflow blocks.
294*349cc55cSDimitry Andric   MachineBasicBlock *CurrentMBB = MI.getParent();
295*349cc55cSDimitry Andric   MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
296*349cc55cSDimitry Andric   if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
297*349cc55cSDimitry Andric     return false;
298*349cc55cSDimitry Andric   if (any_of(MRI.use_nodbg_instructions(ResVal),
299*349cc55cSDimitry Andric              [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
300*349cc55cSDimitry Andric                return &MI != &I &&
301*349cc55cSDimitry Andric                       (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
302*349cc55cSDimitry Andric              }))
303*349cc55cSDimitry Andric     return false;
304*349cc55cSDimitry Andric 
305*349cc55cSDimitry Andric   // Remove G_ADDO.
306*349cc55cSDimitry Andric   B.setInstrAndDebugLoc(*MI.getNextNode());
307*349cc55cSDimitry Andric   MI.eraseFromParent();
308*349cc55cSDimitry Andric 
309*349cc55cSDimitry Andric   // Emit wide add.
310*349cc55cSDimitry Andric   Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
311*349cc55cSDimitry Andric   B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
312*349cc55cSDimitry Andric 
313*349cc55cSDimitry Andric   // Emit check of the 9th or 17th bit and update users (the branch). This will
314*349cc55cSDimitry Andric   // later be folded to TBNZ.
315*349cc55cSDimitry Andric   Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
316*349cc55cSDimitry Andric   B.buildAnd(
317*349cc55cSDimitry Andric       CondBit, AddDst,
318*349cc55cSDimitry Andric       B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
319*349cc55cSDimitry Andric   B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
320*349cc55cSDimitry Andric               B.buildConstant(LLT::scalar(32), 0));
321*349cc55cSDimitry Andric 
322*349cc55cSDimitry Andric   // Update ZEXts users of the result value. Because all uses are in the
323*349cc55cSDimitry Andric   // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
324*349cc55cSDimitry Andric   B.buildZExtOrTrunc(ResVal, AddDst);
325*349cc55cSDimitry Andric   for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
326*349cc55cSDimitry Andric     Register WideReg;
327*349cc55cSDimitry Andric     if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
328*349cc55cSDimitry Andric       auto OldR = U.getParent()->getOperand(0).getReg();
329*349cc55cSDimitry Andric       Observer.erasingInstr(*U.getParent());
330*349cc55cSDimitry Andric       U.getParent()->eraseFromParent();
331*349cc55cSDimitry Andric       Helper.replaceRegWith(MRI, OldR, AddDst);
332*349cc55cSDimitry Andric     }
333*349cc55cSDimitry Andric   }
334*349cc55cSDimitry Andric 
335*349cc55cSDimitry Andric   return true;
336*349cc55cSDimitry Andric }
337*349cc55cSDimitry Andric 
3385ffd83dbSDimitry Andric class AArch64PreLegalizerCombinerHelperState {
3395ffd83dbSDimitry Andric protected:
3405ffd83dbSDimitry Andric   CombinerHelper &Helper;
3415ffd83dbSDimitry Andric 
3425ffd83dbSDimitry Andric public:
3435ffd83dbSDimitry Andric   AArch64PreLegalizerCombinerHelperState(CombinerHelper &Helper)
3445ffd83dbSDimitry Andric       : Helper(Helper) {}
3455ffd83dbSDimitry Andric };
3465ffd83dbSDimitry Andric 
3475ffd83dbSDimitry Andric #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
3485ffd83dbSDimitry Andric #include "AArch64GenPreLegalizeGICombiner.inc"
3495ffd83dbSDimitry Andric #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
3505ffd83dbSDimitry Andric 
3515ffd83dbSDimitry Andric namespace {
3525ffd83dbSDimitry Andric #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
3535ffd83dbSDimitry Andric #include "AArch64GenPreLegalizeGICombiner.inc"
3545ffd83dbSDimitry Andric #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
3555ffd83dbSDimitry Andric 
3565ffd83dbSDimitry Andric class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
3575ffd83dbSDimitry Andric   GISelKnownBits *KB;
3585ffd83dbSDimitry Andric   MachineDominatorTree *MDT;
3595ffd83dbSDimitry Andric   AArch64GenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
3605ffd83dbSDimitry Andric 
3615ffd83dbSDimitry Andric public:
3625ffd83dbSDimitry Andric   AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
3635ffd83dbSDimitry Andric                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
3645ffd83dbSDimitry Andric       : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
3655ffd83dbSDimitry Andric                      /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
3665ffd83dbSDimitry Andric         KB(KB), MDT(MDT) {
3675ffd83dbSDimitry Andric     if (!GeneratedRuleCfg.parseCommandLineOption())
3685ffd83dbSDimitry Andric       report_fatal_error("Invalid rule identifier");
3695ffd83dbSDimitry Andric   }
3705ffd83dbSDimitry Andric 
3715ffd83dbSDimitry Andric   virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
3725ffd83dbSDimitry Andric                        MachineIRBuilder &B) const override;
3735ffd83dbSDimitry Andric };
3745ffd83dbSDimitry Andric 
3755ffd83dbSDimitry Andric bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
3765ffd83dbSDimitry Andric                                               MachineInstr &MI,
3775ffd83dbSDimitry Andric                                               MachineIRBuilder &B) const {
3785ffd83dbSDimitry Andric   CombinerHelper Helper(Observer, B, KB, MDT);
3795ffd83dbSDimitry Andric   AArch64GenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
3805ffd83dbSDimitry Andric 
3815ffd83dbSDimitry Andric   if (Generated.tryCombineAll(Observer, MI, B))
3825ffd83dbSDimitry Andric     return true;
3835ffd83dbSDimitry Andric 
384fe6060f1SDimitry Andric   unsigned Opc = MI.getOpcode();
385fe6060f1SDimitry Andric   switch (Opc) {
3865ffd83dbSDimitry Andric   case TargetOpcode::G_CONCAT_VECTORS:
3875ffd83dbSDimitry Andric     return Helper.tryCombineConcatVectors(MI);
3885ffd83dbSDimitry Andric   case TargetOpcode::G_SHUFFLE_VECTOR:
3895ffd83dbSDimitry Andric     return Helper.tryCombineShuffleVector(MI);
390*349cc55cSDimitry Andric   case TargetOpcode::G_UADDO:
391*349cc55cSDimitry Andric     return tryToSimplifyUADDO(MI, B, Helper, Observer);
392fe6060f1SDimitry Andric   case TargetOpcode::G_MEMCPY_INLINE:
393fe6060f1SDimitry Andric     return Helper.tryEmitMemcpyInline(MI);
394e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMCPY:
395e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMMOVE:
396e8d8bef9SDimitry Andric   case TargetOpcode::G_MEMSET: {
397e8d8bef9SDimitry Andric     // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
398e8d8bef9SDimitry Andric     // heuristics decide.
399e8d8bef9SDimitry Andric     unsigned MaxLen = EnableOpt ? 0 : 32;
400e8d8bef9SDimitry Andric     // Try to inline memcpy type calls if optimizations are enabled.
401fe6060f1SDimitry Andric     if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
402fe6060f1SDimitry Andric       return true;
403fe6060f1SDimitry Andric     if (Opc == TargetOpcode::G_MEMSET)
404fe6060f1SDimitry Andric       return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize);
405fe6060f1SDimitry Andric     return false;
406e8d8bef9SDimitry Andric   }
4075ffd83dbSDimitry Andric   }
4085ffd83dbSDimitry Andric 
4095ffd83dbSDimitry Andric   return false;
4105ffd83dbSDimitry Andric }
4115ffd83dbSDimitry Andric 
4125ffd83dbSDimitry Andric #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
4135ffd83dbSDimitry Andric #include "AArch64GenPreLegalizeGICombiner.inc"
4145ffd83dbSDimitry Andric #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
4155ffd83dbSDimitry Andric 
4165ffd83dbSDimitry Andric // Pass boilerplate
4175ffd83dbSDimitry Andric // ================
4185ffd83dbSDimitry Andric 
4195ffd83dbSDimitry Andric class AArch64PreLegalizerCombiner : public MachineFunctionPass {
4205ffd83dbSDimitry Andric public:
4215ffd83dbSDimitry Andric   static char ID;
4225ffd83dbSDimitry Andric 
423fe6060f1SDimitry Andric   AArch64PreLegalizerCombiner();
4245ffd83dbSDimitry Andric 
4255ffd83dbSDimitry Andric   StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
4265ffd83dbSDimitry Andric 
4275ffd83dbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
4285ffd83dbSDimitry Andric 
4295ffd83dbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
4305ffd83dbSDimitry Andric };
4315ffd83dbSDimitry Andric } // end anonymous namespace
4325ffd83dbSDimitry Andric 
4335ffd83dbSDimitry Andric void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
4345ffd83dbSDimitry Andric   AU.addRequired<TargetPassConfig>();
4355ffd83dbSDimitry Andric   AU.setPreservesCFG();
4365ffd83dbSDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
4375ffd83dbSDimitry Andric   AU.addRequired<GISelKnownBitsAnalysis>();
4385ffd83dbSDimitry Andric   AU.addPreserved<GISelKnownBitsAnalysis>();
4395ffd83dbSDimitry Andric   AU.addRequired<MachineDominatorTree>();
4405ffd83dbSDimitry Andric   AU.addPreserved<MachineDominatorTree>();
441fe6060f1SDimitry Andric   AU.addRequired<GISelCSEAnalysisWrapperPass>();
442fe6060f1SDimitry Andric   AU.addPreserved<GISelCSEAnalysisWrapperPass>();
4435ffd83dbSDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
4445ffd83dbSDimitry Andric }
4455ffd83dbSDimitry Andric 
446fe6060f1SDimitry Andric AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
447fe6060f1SDimitry Andric     : MachineFunctionPass(ID) {
4485ffd83dbSDimitry Andric   initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
4495ffd83dbSDimitry Andric }
4505ffd83dbSDimitry Andric 
4515ffd83dbSDimitry Andric bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
4525ffd83dbSDimitry Andric   if (MF.getProperties().hasProperty(
4535ffd83dbSDimitry Andric           MachineFunctionProperties::Property::FailedISel))
4545ffd83dbSDimitry Andric     return false;
455fe6060f1SDimitry Andric   auto &TPC = getAnalysis<TargetPassConfig>();
456fe6060f1SDimitry Andric 
457fe6060f1SDimitry Andric   // Enable CSE.
458fe6060f1SDimitry Andric   GISelCSEAnalysisWrapper &Wrapper =
459fe6060f1SDimitry Andric       getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
460fe6060f1SDimitry Andric   auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig());
461fe6060f1SDimitry Andric 
4625ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
4635ffd83dbSDimitry Andric   bool EnableOpt =
4645ffd83dbSDimitry Andric       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
4655ffd83dbSDimitry Andric   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
466fe6060f1SDimitry Andric   MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
4675ffd83dbSDimitry Andric   AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
4685ffd83dbSDimitry Andric                                          F.hasMinSize(), KB, MDT);
469fe6060f1SDimitry Andric   Combiner C(PCInfo, &TPC);
470fe6060f1SDimitry Andric   return C.combineMachineInstrs(MF, CSEInfo);
4715ffd83dbSDimitry Andric }
4725ffd83dbSDimitry Andric 
4735ffd83dbSDimitry Andric char AArch64PreLegalizerCombiner::ID = 0;
4745ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
4755ffd83dbSDimitry Andric                       "Combine AArch64 machine instrs before legalization",
4765ffd83dbSDimitry Andric                       false, false)
4775ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
4785ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
479fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
4805ffd83dbSDimitry Andric INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
4815ffd83dbSDimitry Andric                     "Combine AArch64 machine instrs before legalization", false,
4825ffd83dbSDimitry Andric                     false)
4835ffd83dbSDimitry Andric 
4845ffd83dbSDimitry Andric 
4855ffd83dbSDimitry Andric namespace llvm {
486fe6060f1SDimitry Andric FunctionPass *createAArch64PreLegalizerCombiner() {
487fe6060f1SDimitry Andric   return new AArch64PreLegalizerCombiner();
4885ffd83dbSDimitry Andric }
4895ffd83dbSDimitry Andric } // end namespace llvm
490