15ffd83dbSDimitry Andric //=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // This pass does combining of machine instructions at the generic MI level, 105ffd83dbSDimitry Andric // before the legalizer. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 14fe6060f1SDimitry Andric #include "AArch64GlobalISelUtils.h" 155ffd83dbSDimitry Andric #include "AArch64TargetMachine.h" 165ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h" 175ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 185ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 195ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 205ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 21fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 225ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 23fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 245ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 25fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 265ffd83dbSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 27fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h" 285ffd83dbSDimitry Andric #include "llvm/Support/Debug.h" 295ffd83dbSDimitry Andric 305ffd83dbSDimitry Andric #define DEBUG_TYPE "aarch64-prelegalizer-combiner" 315ffd83dbSDimitry Andric 325ffd83dbSDimitry Andric using namespace llvm; 335ffd83dbSDimitry Andric using namespace MIPatternMatch; 345ffd83dbSDimitry Andric 355ffd83dbSDimitry Andric /// Return true if a G_FCONSTANT instruction is known to be better-represented 365ffd83dbSDimitry Andric /// as a G_CONSTANT. 375ffd83dbSDimitry Andric static bool matchFConstantToConstant(MachineInstr &MI, 385ffd83dbSDimitry Andric MachineRegisterInfo &MRI) { 395ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); 405ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 415ffd83dbSDimitry Andric const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); 425ffd83dbSDimitry Andric if (DstSize != 32 && DstSize != 64) 435ffd83dbSDimitry Andric return false; 445ffd83dbSDimitry Andric 455ffd83dbSDimitry Andric // When we're storing a value, it doesn't matter what register bank it's on. 465ffd83dbSDimitry Andric // Since not all floating point constants can be materialized using a fmov, 475ffd83dbSDimitry Andric // it makes more sense to just use a GPR. 485ffd83dbSDimitry Andric return all_of(MRI.use_nodbg_instructions(DstReg), 495ffd83dbSDimitry Andric [](const MachineInstr &Use) { return Use.mayStore(); }); 505ffd83dbSDimitry Andric } 515ffd83dbSDimitry Andric 525ffd83dbSDimitry Andric /// Change a G_FCONSTANT into a G_CONSTANT. 535ffd83dbSDimitry Andric static void applyFConstantToConstant(MachineInstr &MI) { 545ffd83dbSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); 555ffd83dbSDimitry Andric MachineIRBuilder MIB(MI); 565ffd83dbSDimitry Andric const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF(); 575ffd83dbSDimitry Andric MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt()); 585ffd83dbSDimitry Andric MI.eraseFromParent(); 595ffd83dbSDimitry Andric } 605ffd83dbSDimitry Andric 61fe6060f1SDimitry Andric /// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits 62fe6060f1SDimitry Andric /// are sign bits. In this case, we can transform the G_ICMP to directly compare 63fe6060f1SDimitry Andric /// the wide value with a zero. 64fe6060f1SDimitry Andric static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, 65fe6060f1SDimitry Andric GISelKnownBits *KB, Register &MatchInfo) { 66fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB); 67fe6060f1SDimitry Andric 68fe6060f1SDimitry Andric auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate(); 69fe6060f1SDimitry Andric if (!ICmpInst::isEquality(Pred)) 70fe6060f1SDimitry Andric return false; 71fe6060f1SDimitry Andric 72fe6060f1SDimitry Andric Register LHS = MI.getOperand(2).getReg(); 73fe6060f1SDimitry Andric LLT LHSTy = MRI.getType(LHS); 74fe6060f1SDimitry Andric if (!LHSTy.isScalar()) 75fe6060f1SDimitry Andric return false; 76fe6060f1SDimitry Andric 77fe6060f1SDimitry Andric Register RHS = MI.getOperand(3).getReg(); 78fe6060f1SDimitry Andric Register WideReg; 79fe6060f1SDimitry Andric 80fe6060f1SDimitry Andric if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) || 81fe6060f1SDimitry Andric !mi_match(RHS, MRI, m_SpecificICst(0))) 82fe6060f1SDimitry Andric return false; 83fe6060f1SDimitry Andric 84fe6060f1SDimitry Andric LLT WideTy = MRI.getType(WideReg); 85fe6060f1SDimitry Andric if (KB->computeNumSignBits(WideReg) <= 86fe6060f1SDimitry Andric WideTy.getSizeInBits() - LHSTy.getSizeInBits()) 87fe6060f1SDimitry Andric return false; 88fe6060f1SDimitry Andric 89fe6060f1SDimitry Andric MatchInfo = WideReg; 90fe6060f1SDimitry Andric return true; 91fe6060f1SDimitry Andric } 92fe6060f1SDimitry Andric 93fe6060f1SDimitry Andric static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, 94fe6060f1SDimitry Andric MachineIRBuilder &Builder, 95fe6060f1SDimitry Andric GISelChangeObserver &Observer, 96fe6060f1SDimitry Andric Register &WideReg) { 97fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ICMP); 98fe6060f1SDimitry Andric 99fe6060f1SDimitry Andric LLT WideTy = MRI.getType(WideReg); 100fe6060f1SDimitry Andric // We're going to directly use the wide register as the LHS, and then use an 101fe6060f1SDimitry Andric // equivalent size zero for RHS. 102fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI); 103fe6060f1SDimitry Andric auto WideZero = Builder.buildConstant(WideTy, 0); 104fe6060f1SDimitry Andric Observer.changingInstr(MI); 105fe6060f1SDimitry Andric MI.getOperand(2).setReg(WideReg); 106fe6060f1SDimitry Andric MI.getOperand(3).setReg(WideZero.getReg(0)); 107fe6060f1SDimitry Andric Observer.changedInstr(MI); 108fe6060f1SDimitry Andric return true; 109fe6060f1SDimitry Andric } 110fe6060f1SDimitry Andric 111fe6060f1SDimitry Andric /// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE. 112fe6060f1SDimitry Andric /// 113fe6060f1SDimitry Andric /// e.g. 114fe6060f1SDimitry Andric /// 115fe6060f1SDimitry Andric /// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst 116fe6060f1SDimitry Andric static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI, 117fe6060f1SDimitry Andric std::pair<uint64_t, uint64_t> &MatchInfo) { 118fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); 119fe6060f1SDimitry Andric MachineFunction &MF = *MI.getMF(); 120fe6060f1SDimitry Andric auto &GlobalOp = MI.getOperand(1); 121fe6060f1SDimitry Andric auto *GV = GlobalOp.getGlobal(); 122fe6060f1SDimitry Andric if (GV->isThreadLocal()) 123fe6060f1SDimitry Andric return false; 124fe6060f1SDimitry Andric 125fe6060f1SDimitry Andric // Don't allow anything that could represent offsets etc. 126fe6060f1SDimitry Andric if (MF.getSubtarget<AArch64Subtarget>().ClassifyGlobalReference( 127fe6060f1SDimitry Andric GV, MF.getTarget()) != AArch64II::MO_NO_FLAG) 128fe6060f1SDimitry Andric return false; 129fe6060f1SDimitry Andric 130fe6060f1SDimitry Andric // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants: 131fe6060f1SDimitry Andric // 132fe6060f1SDimitry Andric // %g = G_GLOBAL_VALUE @x 133fe6060f1SDimitry Andric // %ptr1 = G_PTR_ADD %g, cst1 134fe6060f1SDimitry Andric // %ptr2 = G_PTR_ADD %g, cst2 135fe6060f1SDimitry Andric // ... 136fe6060f1SDimitry Andric // %ptrN = G_PTR_ADD %g, cstN 137fe6060f1SDimitry Andric // 138fe6060f1SDimitry Andric // Identify the *smallest* constant. We want to be able to form this: 139fe6060f1SDimitry Andric // 140fe6060f1SDimitry Andric // %offset_g = G_GLOBAL_VALUE @x + min_cst 141fe6060f1SDimitry Andric // %g = G_PTR_ADD %offset_g, -min_cst 142fe6060f1SDimitry Andric // %ptr1 = G_PTR_ADD %g, cst1 143fe6060f1SDimitry Andric // ... 144fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 145fe6060f1SDimitry Andric uint64_t MinOffset = -1ull; 146fe6060f1SDimitry Andric for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) { 147fe6060f1SDimitry Andric if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD) 148fe6060f1SDimitry Andric return false; 149*349cc55cSDimitry Andric auto Cst = getIConstantVRegValWithLookThrough( 150*349cc55cSDimitry Andric UseInstr.getOperand(2).getReg(), MRI); 151fe6060f1SDimitry Andric if (!Cst) 152fe6060f1SDimitry Andric return false; 153fe6060f1SDimitry Andric MinOffset = std::min(MinOffset, Cst->Value.getZExtValue()); 154fe6060f1SDimitry Andric } 155fe6060f1SDimitry Andric 156fe6060f1SDimitry Andric // Require that the new offset is larger than the existing one to avoid 157fe6060f1SDimitry Andric // infinite loops. 158fe6060f1SDimitry Andric uint64_t CurrOffset = GlobalOp.getOffset(); 159fe6060f1SDimitry Andric uint64_t NewOffset = MinOffset + CurrOffset; 160fe6060f1SDimitry Andric if (NewOffset <= CurrOffset) 161fe6060f1SDimitry Andric return false; 162fe6060f1SDimitry Andric 163fe6060f1SDimitry Andric // Check whether folding this offset is legal. It must not go out of bounds of 164fe6060f1SDimitry Andric // the referenced object to avoid violating the code model, and must be 165fe6060f1SDimitry Andric // smaller than 2^21 because this is the largest offset expressible in all 166fe6060f1SDimitry Andric // object formats. 167fe6060f1SDimitry Andric // 168fe6060f1SDimitry Andric // This check also prevents us from folding negative offsets, which will end 169fe6060f1SDimitry Andric // up being treated in the same way as large positive ones. They could also 170fe6060f1SDimitry Andric // cause code model violations, and aren't really common enough to matter. 171fe6060f1SDimitry Andric if (NewOffset >= (1 << 21)) 172fe6060f1SDimitry Andric return false; 173fe6060f1SDimitry Andric 174fe6060f1SDimitry Andric Type *T = GV->getValueType(); 175fe6060f1SDimitry Andric if (!T->isSized() || 176fe6060f1SDimitry Andric NewOffset > GV->getParent()->getDataLayout().getTypeAllocSize(T)) 177fe6060f1SDimitry Andric return false; 178fe6060f1SDimitry Andric MatchInfo = std::make_pair(NewOffset, MinOffset); 179fe6060f1SDimitry Andric return true; 180fe6060f1SDimitry Andric } 181fe6060f1SDimitry Andric 182fe6060f1SDimitry Andric static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI, 183fe6060f1SDimitry Andric MachineIRBuilder &B, 184fe6060f1SDimitry Andric GISelChangeObserver &Observer, 185fe6060f1SDimitry Andric std::pair<uint64_t, uint64_t> &MatchInfo) { 186fe6060f1SDimitry Andric // Change: 187fe6060f1SDimitry Andric // 188fe6060f1SDimitry Andric // %g = G_GLOBAL_VALUE @x 189fe6060f1SDimitry Andric // %ptr1 = G_PTR_ADD %g, cst1 190fe6060f1SDimitry Andric // %ptr2 = G_PTR_ADD %g, cst2 191fe6060f1SDimitry Andric // ... 192fe6060f1SDimitry Andric // %ptrN = G_PTR_ADD %g, cstN 193fe6060f1SDimitry Andric // 194fe6060f1SDimitry Andric // To: 195fe6060f1SDimitry Andric // 196fe6060f1SDimitry Andric // %offset_g = G_GLOBAL_VALUE @x + min_cst 197fe6060f1SDimitry Andric // %g = G_PTR_ADD %offset_g, -min_cst 198fe6060f1SDimitry Andric // %ptr1 = G_PTR_ADD %g, cst1 199fe6060f1SDimitry Andric // ... 200fe6060f1SDimitry Andric // %ptrN = G_PTR_ADD %g, cstN 201fe6060f1SDimitry Andric // 202fe6060f1SDimitry Andric // Then, the original G_PTR_ADDs should be folded later on so that they look 203fe6060f1SDimitry Andric // like this: 204fe6060f1SDimitry Andric // 205fe6060f1SDimitry Andric // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst 206fe6060f1SDimitry Andric uint64_t Offset, MinOffset; 207fe6060f1SDimitry Andric std::tie(Offset, MinOffset) = MatchInfo; 208fe6060f1SDimitry Andric B.setInstrAndDebugLoc(MI); 209fe6060f1SDimitry Andric Observer.changingInstr(MI); 210fe6060f1SDimitry Andric auto &GlobalOp = MI.getOperand(1); 211fe6060f1SDimitry Andric auto *GV = GlobalOp.getGlobal(); 212fe6060f1SDimitry Andric GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags()); 213fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg(); 214fe6060f1SDimitry Andric Register NewGVDst = MRI.cloneVirtualRegister(Dst); 215fe6060f1SDimitry Andric MI.getOperand(0).setReg(NewGVDst); 216fe6060f1SDimitry Andric Observer.changedInstr(MI); 217fe6060f1SDimitry Andric B.buildPtrAdd( 218fe6060f1SDimitry Andric Dst, NewGVDst, 219fe6060f1SDimitry Andric B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset))); 220fe6060f1SDimitry Andric return true; 221fe6060f1SDimitry Andric } 222fe6060f1SDimitry Andric 223*349cc55cSDimitry Andric static bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B, 224*349cc55cSDimitry Andric CombinerHelper &Helper, 225*349cc55cSDimitry Andric GISelChangeObserver &Observer) { 226*349cc55cSDimitry Andric // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if 227*349cc55cSDimitry Andric // result is only used in the no-overflow case. It is restricted to cases 228*349cc55cSDimitry Andric // where we know that the high-bits of the operands are 0. If there's an 229*349cc55cSDimitry Andric // overflow, then the the 9th or 17th bit must be set, which can be checked 230*349cc55cSDimitry Andric // using TBNZ. 231*349cc55cSDimitry Andric // 232*349cc55cSDimitry Andric // Change (for UADDOs on 8 and 16 bits): 233*349cc55cSDimitry Andric // 234*349cc55cSDimitry Andric // %z0 = G_ASSERT_ZEXT _ 235*349cc55cSDimitry Andric // %op0 = G_TRUNC %z0 236*349cc55cSDimitry Andric // %z1 = G_ASSERT_ZEXT _ 237*349cc55cSDimitry Andric // %op1 = G_TRUNC %z1 238*349cc55cSDimitry Andric // %val, %cond = G_UADDO %op0, %op1 239*349cc55cSDimitry Andric // G_BRCOND %cond, %error.bb 240*349cc55cSDimitry Andric // 241*349cc55cSDimitry Andric // error.bb: 242*349cc55cSDimitry Andric // (no successors and no uses of %val) 243*349cc55cSDimitry Andric // 244*349cc55cSDimitry Andric // To: 245*349cc55cSDimitry Andric // 246*349cc55cSDimitry Andric // %z0 = G_ASSERT_ZEXT _ 247*349cc55cSDimitry Andric // %z1 = G_ASSERT_ZEXT _ 248*349cc55cSDimitry Andric // %add = G_ADD %z0, %z1 249*349cc55cSDimitry Andric // %val = G_TRUNC %add 250*349cc55cSDimitry Andric // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1) 251*349cc55cSDimitry Andric // %cond = G_ICMP NE, %bit, 0 252*349cc55cSDimitry Andric // G_BRCOND %cond, %error.bb 253*349cc55cSDimitry Andric 254*349cc55cSDimitry Andric auto &MRI = *B.getMRI(); 255*349cc55cSDimitry Andric 256*349cc55cSDimitry Andric MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg()); 257*349cc55cSDimitry Andric MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg()); 258*349cc55cSDimitry Andric Register Op0Wide; 259*349cc55cSDimitry Andric Register Op1Wide; 260*349cc55cSDimitry Andric if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) || 261*349cc55cSDimitry Andric !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide)))) 262*349cc55cSDimitry Andric return false; 263*349cc55cSDimitry Andric LLT WideTy0 = MRI.getType(Op0Wide); 264*349cc55cSDimitry Andric LLT WideTy1 = MRI.getType(Op1Wide); 265*349cc55cSDimitry Andric Register ResVal = MI.getOperand(0).getReg(); 266*349cc55cSDimitry Andric LLT OpTy = MRI.getType(ResVal); 267*349cc55cSDimitry Andric MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide); 268*349cc55cSDimitry Andric MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide); 269*349cc55cSDimitry Andric 270*349cc55cSDimitry Andric unsigned OpTySize = OpTy.getScalarSizeInBits(); 271*349cc55cSDimitry Andric // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the 272*349cc55cSDimitry Andric // inputs have been zero-extended. 273*349cc55cSDimitry Andric if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT || 274*349cc55cSDimitry Andric Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT || 275*349cc55cSDimitry Andric OpTySize != Op0WideDef->getOperand(2).getImm() || 276*349cc55cSDimitry Andric OpTySize != Op1WideDef->getOperand(2).getImm()) 277*349cc55cSDimitry Andric return false; 278*349cc55cSDimitry Andric 279*349cc55cSDimitry Andric // Only scalar UADDO with either 8 or 16 bit operands are handled. 280*349cc55cSDimitry Andric if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 || 281*349cc55cSDimitry Andric OpTySize >= WideTy0.getScalarSizeInBits() || 282*349cc55cSDimitry Andric (OpTySize != 8 && OpTySize != 16)) 283*349cc55cSDimitry Andric return false; 284*349cc55cSDimitry Andric 285*349cc55cSDimitry Andric // The overflow-status result must be used by a branch only. 286*349cc55cSDimitry Andric Register ResStatus = MI.getOperand(1).getReg(); 287*349cc55cSDimitry Andric if (!MRI.hasOneNonDBGUse(ResStatus)) 288*349cc55cSDimitry Andric return false; 289*349cc55cSDimitry Andric MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus); 290*349cc55cSDimitry Andric if (CondUser->getOpcode() != TargetOpcode::G_BRCOND) 291*349cc55cSDimitry Andric return false; 292*349cc55cSDimitry Andric 293*349cc55cSDimitry Andric // Make sure the computed result is only used in the no-overflow blocks. 294*349cc55cSDimitry Andric MachineBasicBlock *CurrentMBB = MI.getParent(); 295*349cc55cSDimitry Andric MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB(); 296*349cc55cSDimitry Andric if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB) 297*349cc55cSDimitry Andric return false; 298*349cc55cSDimitry Andric if (any_of(MRI.use_nodbg_instructions(ResVal), 299*349cc55cSDimitry Andric [&MI, FailMBB, CurrentMBB](MachineInstr &I) { 300*349cc55cSDimitry Andric return &MI != &I && 301*349cc55cSDimitry Andric (I.getParent() == FailMBB || I.getParent() == CurrentMBB); 302*349cc55cSDimitry Andric })) 303*349cc55cSDimitry Andric return false; 304*349cc55cSDimitry Andric 305*349cc55cSDimitry Andric // Remove G_ADDO. 306*349cc55cSDimitry Andric B.setInstrAndDebugLoc(*MI.getNextNode()); 307*349cc55cSDimitry Andric MI.eraseFromParent(); 308*349cc55cSDimitry Andric 309*349cc55cSDimitry Andric // Emit wide add. 310*349cc55cSDimitry Andric Register AddDst = MRI.cloneVirtualRegister(Op0Wide); 311*349cc55cSDimitry Andric B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide}); 312*349cc55cSDimitry Andric 313*349cc55cSDimitry Andric // Emit check of the 9th or 17th bit and update users (the branch). This will 314*349cc55cSDimitry Andric // later be folded to TBNZ. 315*349cc55cSDimitry Andric Register CondBit = MRI.cloneVirtualRegister(Op0Wide); 316*349cc55cSDimitry Andric B.buildAnd( 317*349cc55cSDimitry Andric CondBit, AddDst, 318*349cc55cSDimitry Andric B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16)); 319*349cc55cSDimitry Andric B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit, 320*349cc55cSDimitry Andric B.buildConstant(LLT::scalar(32), 0)); 321*349cc55cSDimitry Andric 322*349cc55cSDimitry Andric // Update ZEXts users of the result value. Because all uses are in the 323*349cc55cSDimitry Andric // no-overflow case, we know that the top bits are 0 and we can ignore ZExts. 324*349cc55cSDimitry Andric B.buildZExtOrTrunc(ResVal, AddDst); 325*349cc55cSDimitry Andric for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) { 326*349cc55cSDimitry Andric Register WideReg; 327*349cc55cSDimitry Andric if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) { 328*349cc55cSDimitry Andric auto OldR = U.getParent()->getOperand(0).getReg(); 329*349cc55cSDimitry Andric Observer.erasingInstr(*U.getParent()); 330*349cc55cSDimitry Andric U.getParent()->eraseFromParent(); 331*349cc55cSDimitry Andric Helper.replaceRegWith(MRI, OldR, AddDst); 332*349cc55cSDimitry Andric } 333*349cc55cSDimitry Andric } 334*349cc55cSDimitry Andric 335*349cc55cSDimitry Andric return true; 336*349cc55cSDimitry Andric } 337*349cc55cSDimitry Andric 3385ffd83dbSDimitry Andric class AArch64PreLegalizerCombinerHelperState { 3395ffd83dbSDimitry Andric protected: 3405ffd83dbSDimitry Andric CombinerHelper &Helper; 3415ffd83dbSDimitry Andric 3425ffd83dbSDimitry Andric public: 3435ffd83dbSDimitry Andric AArch64PreLegalizerCombinerHelperState(CombinerHelper &Helper) 3445ffd83dbSDimitry Andric : Helper(Helper) {} 3455ffd83dbSDimitry Andric }; 3465ffd83dbSDimitry Andric 3475ffd83dbSDimitry Andric #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 3485ffd83dbSDimitry Andric #include "AArch64GenPreLegalizeGICombiner.inc" 3495ffd83dbSDimitry Andric #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 3505ffd83dbSDimitry Andric 3515ffd83dbSDimitry Andric namespace { 3525ffd83dbSDimitry Andric #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 3535ffd83dbSDimitry Andric #include "AArch64GenPreLegalizeGICombiner.inc" 3545ffd83dbSDimitry Andric #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 3555ffd83dbSDimitry Andric 3565ffd83dbSDimitry Andric class AArch64PreLegalizerCombinerInfo : public CombinerInfo { 3575ffd83dbSDimitry Andric GISelKnownBits *KB; 3585ffd83dbSDimitry Andric MachineDominatorTree *MDT; 3595ffd83dbSDimitry Andric AArch64GenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 3605ffd83dbSDimitry Andric 3615ffd83dbSDimitry Andric public: 3625ffd83dbSDimitry Andric AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 3635ffd83dbSDimitry Andric GISelKnownBits *KB, MachineDominatorTree *MDT) 3645ffd83dbSDimitry Andric : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 3655ffd83dbSDimitry Andric /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), 3665ffd83dbSDimitry Andric KB(KB), MDT(MDT) { 3675ffd83dbSDimitry Andric if (!GeneratedRuleCfg.parseCommandLineOption()) 3685ffd83dbSDimitry Andric report_fatal_error("Invalid rule identifier"); 3695ffd83dbSDimitry Andric } 3705ffd83dbSDimitry Andric 3715ffd83dbSDimitry Andric virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 3725ffd83dbSDimitry Andric MachineIRBuilder &B) const override; 3735ffd83dbSDimitry Andric }; 3745ffd83dbSDimitry Andric 3755ffd83dbSDimitry Andric bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 3765ffd83dbSDimitry Andric MachineInstr &MI, 3775ffd83dbSDimitry Andric MachineIRBuilder &B) const { 3785ffd83dbSDimitry Andric CombinerHelper Helper(Observer, B, KB, MDT); 3795ffd83dbSDimitry Andric AArch64GenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper); 3805ffd83dbSDimitry Andric 3815ffd83dbSDimitry Andric if (Generated.tryCombineAll(Observer, MI, B)) 3825ffd83dbSDimitry Andric return true; 3835ffd83dbSDimitry Andric 384fe6060f1SDimitry Andric unsigned Opc = MI.getOpcode(); 385fe6060f1SDimitry Andric switch (Opc) { 3865ffd83dbSDimitry Andric case TargetOpcode::G_CONCAT_VECTORS: 3875ffd83dbSDimitry Andric return Helper.tryCombineConcatVectors(MI); 3885ffd83dbSDimitry Andric case TargetOpcode::G_SHUFFLE_VECTOR: 3895ffd83dbSDimitry Andric return Helper.tryCombineShuffleVector(MI); 390*349cc55cSDimitry Andric case TargetOpcode::G_UADDO: 391*349cc55cSDimitry Andric return tryToSimplifyUADDO(MI, B, Helper, Observer); 392fe6060f1SDimitry Andric case TargetOpcode::G_MEMCPY_INLINE: 393fe6060f1SDimitry Andric return Helper.tryEmitMemcpyInline(MI); 394e8d8bef9SDimitry Andric case TargetOpcode::G_MEMCPY: 395e8d8bef9SDimitry Andric case TargetOpcode::G_MEMMOVE: 396e8d8bef9SDimitry Andric case TargetOpcode::G_MEMSET: { 397e8d8bef9SDimitry Andric // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other 398e8d8bef9SDimitry Andric // heuristics decide. 399e8d8bef9SDimitry Andric unsigned MaxLen = EnableOpt ? 0 : 32; 400e8d8bef9SDimitry Andric // Try to inline memcpy type calls if optimizations are enabled. 401fe6060f1SDimitry Andric if (Helper.tryCombineMemCpyFamily(MI, MaxLen)) 402fe6060f1SDimitry Andric return true; 403fe6060f1SDimitry Andric if (Opc == TargetOpcode::G_MEMSET) 404fe6060f1SDimitry Andric return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize); 405fe6060f1SDimitry Andric return false; 406e8d8bef9SDimitry Andric } 4075ffd83dbSDimitry Andric } 4085ffd83dbSDimitry Andric 4095ffd83dbSDimitry Andric return false; 4105ffd83dbSDimitry Andric } 4115ffd83dbSDimitry Andric 4125ffd83dbSDimitry Andric #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 4135ffd83dbSDimitry Andric #include "AArch64GenPreLegalizeGICombiner.inc" 4145ffd83dbSDimitry Andric #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 4155ffd83dbSDimitry Andric 4165ffd83dbSDimitry Andric // Pass boilerplate 4175ffd83dbSDimitry Andric // ================ 4185ffd83dbSDimitry Andric 4195ffd83dbSDimitry Andric class AArch64PreLegalizerCombiner : public MachineFunctionPass { 4205ffd83dbSDimitry Andric public: 4215ffd83dbSDimitry Andric static char ID; 4225ffd83dbSDimitry Andric 423fe6060f1SDimitry Andric AArch64PreLegalizerCombiner(); 4245ffd83dbSDimitry Andric 4255ffd83dbSDimitry Andric StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; } 4265ffd83dbSDimitry Andric 4275ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 4285ffd83dbSDimitry Andric 4295ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 4305ffd83dbSDimitry Andric }; 4315ffd83dbSDimitry Andric } // end anonymous namespace 4325ffd83dbSDimitry Andric 4335ffd83dbSDimitry Andric void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 4345ffd83dbSDimitry Andric AU.addRequired<TargetPassConfig>(); 4355ffd83dbSDimitry Andric AU.setPreservesCFG(); 4365ffd83dbSDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU); 4375ffd83dbSDimitry Andric AU.addRequired<GISelKnownBitsAnalysis>(); 4385ffd83dbSDimitry Andric AU.addPreserved<GISelKnownBitsAnalysis>(); 4395ffd83dbSDimitry Andric AU.addRequired<MachineDominatorTree>(); 4405ffd83dbSDimitry Andric AU.addPreserved<MachineDominatorTree>(); 441fe6060f1SDimitry Andric AU.addRequired<GISelCSEAnalysisWrapperPass>(); 442fe6060f1SDimitry Andric AU.addPreserved<GISelCSEAnalysisWrapperPass>(); 4435ffd83dbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 4445ffd83dbSDimitry Andric } 4455ffd83dbSDimitry Andric 446fe6060f1SDimitry Andric AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() 447fe6060f1SDimitry Andric : MachineFunctionPass(ID) { 4485ffd83dbSDimitry Andric initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 4495ffd83dbSDimitry Andric } 4505ffd83dbSDimitry Andric 4515ffd83dbSDimitry Andric bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 4525ffd83dbSDimitry Andric if (MF.getProperties().hasProperty( 4535ffd83dbSDimitry Andric MachineFunctionProperties::Property::FailedISel)) 4545ffd83dbSDimitry Andric return false; 455fe6060f1SDimitry Andric auto &TPC = getAnalysis<TargetPassConfig>(); 456fe6060f1SDimitry Andric 457fe6060f1SDimitry Andric // Enable CSE. 458fe6060f1SDimitry Andric GISelCSEAnalysisWrapper &Wrapper = 459fe6060f1SDimitry Andric getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); 460fe6060f1SDimitry Andric auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig()); 461fe6060f1SDimitry Andric 4625ffd83dbSDimitry Andric const Function &F = MF.getFunction(); 4635ffd83dbSDimitry Andric bool EnableOpt = 4645ffd83dbSDimitry Andric MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 4655ffd83dbSDimitry Andric GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 466fe6060f1SDimitry Andric MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>(); 4675ffd83dbSDimitry Andric AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 4685ffd83dbSDimitry Andric F.hasMinSize(), KB, MDT); 469fe6060f1SDimitry Andric Combiner C(PCInfo, &TPC); 470fe6060f1SDimitry Andric return C.combineMachineInstrs(MF, CSEInfo); 4715ffd83dbSDimitry Andric } 4725ffd83dbSDimitry Andric 4735ffd83dbSDimitry Andric char AArch64PreLegalizerCombiner::ID = 0; 4745ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE, 4755ffd83dbSDimitry Andric "Combine AArch64 machine instrs before legalization", 4765ffd83dbSDimitry Andric false, false) 4775ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 4785ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 479fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) 4805ffd83dbSDimitry Andric INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE, 4815ffd83dbSDimitry Andric "Combine AArch64 machine instrs before legalization", false, 4825ffd83dbSDimitry Andric false) 4835ffd83dbSDimitry Andric 4845ffd83dbSDimitry Andric 4855ffd83dbSDimitry Andric namespace llvm { 486fe6060f1SDimitry Andric FunctionPass *createAArch64PreLegalizerCombiner() { 487fe6060f1SDimitry Andric return new AArch64PreLegalizerCombiner(); 4885ffd83dbSDimitry Andric } 4895ffd83dbSDimitry Andric } // end namespace llvm 490