10b57cec5SDimitry Andric //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file contains a pass that performs load / store related peephole 100b57cec5SDimitry Andric // optimizations. This pass should be run after register allocation. 110b57cec5SDimitry Andric // 1281ad6265SDimitry Andric // The pass runs after the PrologEpilogInserter where we emit the CFI 1381ad6265SDimitry Andric // instructions. In order to preserve the correctness of the unwind informaiton, 1481ad6265SDimitry Andric // the pass should not change the order of any two instructions, one of which 1581ad6265SDimitry Andric // has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix 1681ad6265SDimitry Andric // to unwind information. 1781ad6265SDimitry Andric // 180b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 21fe6060f1SDimitry Andric #include "AArch64MachineFunctionInfo.h" 220b57cec5SDimitry Andric #include "AArch64Subtarget.h" 230b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 240b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 250b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 260b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 270b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h" 280b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 290b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 300b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 310b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 320b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 330b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 35480093f4SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 360b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 370b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 385ffd83dbSDimitry Andric #include "llvm/MC/MCAsmInfo.h" 3981ad6265SDimitry Andric #include "llvm/MC/MCDwarf.h" 400b57cec5SDimitry Andric #include "llvm/MC/MCRegisterInfo.h" 410b57cec5SDimitry Andric #include "llvm/Pass.h" 420b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 430b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 44480093f4SDimitry Andric #include "llvm/Support/DebugCounter.h" 450b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 460b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 470b57cec5SDimitry Andric #include <cassert> 480b57cec5SDimitry Andric #include <cstdint> 49480093f4SDimitry Andric #include <functional> 500b57cec5SDimitry Andric #include <iterator> 510b57cec5SDimitry Andric #include <limits> 52bdd1243dSDimitry Andric #include <optional> 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric using namespace llvm; 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-ldst-opt" 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); 590b57cec5SDimitry Andric STATISTIC(NumPostFolded, "Number of post-index updates folded"); 600b57cec5SDimitry Andric STATISTIC(NumPreFolded, "Number of pre-index updates folded"); 610b57cec5SDimitry Andric STATISTIC(NumUnscaledPairCreated, 620b57cec5SDimitry Andric "Number of load/store from unscaled generated"); 630b57cec5SDimitry Andric STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); 640b57cec5SDimitry Andric STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); 65*0fca6ea1SDimitry Andric STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation " 66*0fca6ea1SDimitry Andric "not passed the alignment check"); 670b57cec5SDimitry Andric 68480093f4SDimitry Andric DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", 69480093f4SDimitry Andric "Controls which pairs are considered for renaming"); 70480093f4SDimitry Andric 710b57cec5SDimitry Andric // The LdStLimit limits how far we search for load/store pairs. 720b57cec5SDimitry Andric static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit", 730b57cec5SDimitry Andric cl::init(20), cl::Hidden); 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric // The UpdateLimit limits how far we search for update instructions when we form 760b57cec5SDimitry Andric // pre-/post-index instructions. 770b57cec5SDimitry Andric static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100), 780b57cec5SDimitry Andric cl::Hidden); 790b57cec5SDimitry Andric 8013138422SDimitry Andric // Enable register renaming to find additional store pairing opportunities. 8113138422SDimitry Andric static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming", 825ffd83dbSDimitry Andric cl::init(true), cl::Hidden); 8313138422SDimitry Andric 840b57cec5SDimitry Andric #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric namespace { 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric using LdStPairFlags = struct LdStPairFlags { 890b57cec5SDimitry Andric // If a matching instruction is found, MergeForward is set to true if the 900b57cec5SDimitry Andric // merge is to remove the first instruction and replace the second with 910b57cec5SDimitry Andric // a pair-wise insn, and false if the reverse is true. 920b57cec5SDimitry Andric bool MergeForward = false; 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric // SExtIdx gives the index of the result of the load pair that must be 950b57cec5SDimitry Andric // extended. The value of SExtIdx assumes that the paired load produces the 960b57cec5SDimitry Andric // value in this order: (I, returned iterator), i.e., -1 means no value has 970b57cec5SDimitry Andric // to be extended, 0 means I, and 1 means the returned iterator. 980b57cec5SDimitry Andric int SExtIdx = -1; 990b57cec5SDimitry Andric 100480093f4SDimitry Andric // If not none, RenameReg can be used to rename the result register of the 101480093f4SDimitry Andric // first store in a pair. Currently this only works when merging stores 102480093f4SDimitry Andric // forward. 103bdd1243dSDimitry Andric std::optional<MCPhysReg> RenameReg; 104480093f4SDimitry Andric 1050b57cec5SDimitry Andric LdStPairFlags() = default; 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric void setMergeForward(bool V = true) { MergeForward = V; } 1080b57cec5SDimitry Andric bool getMergeForward() const { return MergeForward; } 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric void setSExtIdx(int V) { SExtIdx = V; } 1110b57cec5SDimitry Andric int getSExtIdx() const { return SExtIdx; } 112480093f4SDimitry Andric 113480093f4SDimitry Andric void setRenameReg(MCPhysReg R) { RenameReg = R; } 114bdd1243dSDimitry Andric void clearRenameReg() { RenameReg = std::nullopt; } 115bdd1243dSDimitry Andric std::optional<MCPhysReg> getRenameReg() const { return RenameReg; } 1160b57cec5SDimitry Andric }; 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric struct AArch64LoadStoreOpt : public MachineFunctionPass { 1190b57cec5SDimitry Andric static char ID; 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric AArch64LoadStoreOpt() : MachineFunctionPass(ID) { 1220b57cec5SDimitry Andric initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric AliasAnalysis *AA; 1260b57cec5SDimitry Andric const AArch64InstrInfo *TII; 1270b57cec5SDimitry Andric const TargetRegisterInfo *TRI; 1280b57cec5SDimitry Andric const AArch64Subtarget *Subtarget; 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric // Track which register units have been modified and used. 1310b57cec5SDimitry Andric LiveRegUnits ModifiedRegUnits, UsedRegUnits; 132480093f4SDimitry Andric LiveRegUnits DefinedInBB; 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1350b57cec5SDimitry Andric AU.addRequired<AAResultsWrapperPass>(); 1360b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric // Scan the instructions looking for a load/store that can be combined 1400b57cec5SDimitry Andric // with the current instruction into a load/store pair. 1410b57cec5SDimitry Andric // Return the matching instruction if one is found, else MBB->end(). 1420b57cec5SDimitry Andric MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, 1430b57cec5SDimitry Andric LdStPairFlags &Flags, 1440b57cec5SDimitry Andric unsigned Limit, 1450b57cec5SDimitry Andric bool FindNarrowMerge); 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric // Scan the instructions looking for a store that writes to the address from 1480b57cec5SDimitry Andric // which the current load instruction reads. Return true if one is found. 1490b57cec5SDimitry Andric bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, 1500b57cec5SDimitry Andric MachineBasicBlock::iterator &StoreI); 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric // Merge the two instructions indicated into a wider narrow store instruction. 1530b57cec5SDimitry Andric MachineBasicBlock::iterator 1540b57cec5SDimitry Andric mergeNarrowZeroStores(MachineBasicBlock::iterator I, 1550b57cec5SDimitry Andric MachineBasicBlock::iterator MergeMI, 1560b57cec5SDimitry Andric const LdStPairFlags &Flags); 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric // Merge the two instructions indicated into a single pair-wise instruction. 1590b57cec5SDimitry Andric MachineBasicBlock::iterator 1600b57cec5SDimitry Andric mergePairedInsns(MachineBasicBlock::iterator I, 1610b57cec5SDimitry Andric MachineBasicBlock::iterator Paired, 1620b57cec5SDimitry Andric const LdStPairFlags &Flags); 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric // Promote the load that reads directly from the address stored to. 1650b57cec5SDimitry Andric MachineBasicBlock::iterator 1660b57cec5SDimitry Andric promoteLoadFromStore(MachineBasicBlock::iterator LoadI, 1670b57cec5SDimitry Andric MachineBasicBlock::iterator StoreI); 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric // Scan the instruction list to find a base register update that can 1700b57cec5SDimitry Andric // be combined with the current instruction (a load or store) using 1710b57cec5SDimitry Andric // pre or post indexed addressing with writeback. Scan forwards. 1720b57cec5SDimitry Andric MachineBasicBlock::iterator 1730b57cec5SDimitry Andric findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, 1740b57cec5SDimitry Andric int UnscaledOffset, unsigned Limit); 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric // Scan the instruction list to find a base register update that can 1770b57cec5SDimitry Andric // be combined with the current instruction (a load or store) using 1780b57cec5SDimitry Andric // pre or post indexed addressing with writeback. Scan backwards. 1790b57cec5SDimitry Andric MachineBasicBlock::iterator 1800b57cec5SDimitry Andric findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric // Find an instruction that updates the base register of the ld/st 1830b57cec5SDimitry Andric // instruction. 1840b57cec5SDimitry Andric bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, 1850b57cec5SDimitry Andric unsigned BaseReg, int Offset); 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric // Merge a pre- or post-index base register update into a ld/st instruction. 1880b57cec5SDimitry Andric MachineBasicBlock::iterator 1890b57cec5SDimitry Andric mergeUpdateInsn(MachineBasicBlock::iterator I, 1900b57cec5SDimitry Andric MachineBasicBlock::iterator Update, bool IsPreIdx); 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andric // Find and merge zero store instructions. 1930b57cec5SDimitry Andric bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric // Find and pair ldr/str instructions. 1960b57cec5SDimitry Andric bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric // Find and promote load instructions which read directly from store. 1990b57cec5SDimitry Andric bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric // Find and merge a base register updates before or after a ld/st instruction. 2020b57cec5SDimitry Andric bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override; 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 2090b57cec5SDimitry Andric return MachineFunctionProperties().set( 2100b57cec5SDimitry Andric MachineFunctionProperties::Property::NoVRegs); 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; } 2140b57cec5SDimitry Andric }; 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric char AArch64LoadStoreOpt::ID = 0; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric } // end anonymous namespace 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", 2210b57cec5SDimitry Andric AARCH64_LOAD_STORE_OPT_NAME, false, false) 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric static bool isNarrowStore(unsigned Opc) { 2240b57cec5SDimitry Andric switch (Opc) { 2250b57cec5SDimitry Andric default: 2260b57cec5SDimitry Andric return false; 2270b57cec5SDimitry Andric case AArch64::STRBBui: 2280b57cec5SDimitry Andric case AArch64::STURBBi: 2290b57cec5SDimitry Andric case AArch64::STRHHui: 2300b57cec5SDimitry Andric case AArch64::STURHHi: 2310b57cec5SDimitry Andric return true; 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 2358bcb0991SDimitry Andric // These instruction set memory tag and either keep memory contents unchanged or 2368bcb0991SDimitry Andric // set it to zero, ignoring the address part of the source register. 2378bcb0991SDimitry Andric static bool isTagStore(const MachineInstr &MI) { 2388bcb0991SDimitry Andric switch (MI.getOpcode()) { 2398bcb0991SDimitry Andric default: 2408bcb0991SDimitry Andric return false; 24106c3fb27SDimitry Andric case AArch64::STGi: 24206c3fb27SDimitry Andric case AArch64::STZGi: 24306c3fb27SDimitry Andric case AArch64::ST2Gi: 24406c3fb27SDimitry Andric case AArch64::STZ2Gi: 2458bcb0991SDimitry Andric return true; 2468bcb0991SDimitry Andric } 2478bcb0991SDimitry Andric } 2488bcb0991SDimitry Andric 2490b57cec5SDimitry Andric static unsigned getMatchingNonSExtOpcode(unsigned Opc, 2500b57cec5SDimitry Andric bool *IsValidLdStrOpc = nullptr) { 2510b57cec5SDimitry Andric if (IsValidLdStrOpc) 2520b57cec5SDimitry Andric *IsValidLdStrOpc = true; 2530b57cec5SDimitry Andric switch (Opc) { 2540b57cec5SDimitry Andric default: 2550b57cec5SDimitry Andric if (IsValidLdStrOpc) 2560b57cec5SDimitry Andric *IsValidLdStrOpc = false; 2570b57cec5SDimitry Andric return std::numeric_limits<unsigned>::max(); 2580b57cec5SDimitry Andric case AArch64::STRDui: 2590b57cec5SDimitry Andric case AArch64::STURDi: 260fe6060f1SDimitry Andric case AArch64::STRDpre: 2610b57cec5SDimitry Andric case AArch64::STRQui: 2620b57cec5SDimitry Andric case AArch64::STURQi: 263fe6060f1SDimitry Andric case AArch64::STRQpre: 2640b57cec5SDimitry Andric case AArch64::STRBBui: 2650b57cec5SDimitry Andric case AArch64::STURBBi: 2660b57cec5SDimitry Andric case AArch64::STRHHui: 2670b57cec5SDimitry Andric case AArch64::STURHHi: 2680b57cec5SDimitry Andric case AArch64::STRWui: 269fe6060f1SDimitry Andric case AArch64::STRWpre: 2700b57cec5SDimitry Andric case AArch64::STURWi: 2710b57cec5SDimitry Andric case AArch64::STRXui: 272fe6060f1SDimitry Andric case AArch64::STRXpre: 2730b57cec5SDimitry Andric case AArch64::STURXi: 2740b57cec5SDimitry Andric case AArch64::LDRDui: 2750b57cec5SDimitry Andric case AArch64::LDURDi: 276fe6060f1SDimitry Andric case AArch64::LDRDpre: 2770b57cec5SDimitry Andric case AArch64::LDRQui: 2780b57cec5SDimitry Andric case AArch64::LDURQi: 279fe6060f1SDimitry Andric case AArch64::LDRQpre: 2800b57cec5SDimitry Andric case AArch64::LDRWui: 2810b57cec5SDimitry Andric case AArch64::LDURWi: 282fe6060f1SDimitry Andric case AArch64::LDRWpre: 2830b57cec5SDimitry Andric case AArch64::LDRXui: 2840b57cec5SDimitry Andric case AArch64::LDURXi: 285fe6060f1SDimitry Andric case AArch64::LDRXpre: 2860b57cec5SDimitry Andric case AArch64::STRSui: 2870b57cec5SDimitry Andric case AArch64::STURSi: 288fe6060f1SDimitry Andric case AArch64::STRSpre: 2890b57cec5SDimitry Andric case AArch64::LDRSui: 2900b57cec5SDimitry Andric case AArch64::LDURSi: 291fe6060f1SDimitry Andric case AArch64::LDRSpre: 2920b57cec5SDimitry Andric return Opc; 2930b57cec5SDimitry Andric case AArch64::LDRSWui: 2940b57cec5SDimitry Andric return AArch64::LDRWui; 2950b57cec5SDimitry Andric case AArch64::LDURSWi: 2960b57cec5SDimitry Andric return AArch64::LDURWi; 2975f757f3fSDimitry Andric case AArch64::LDRSWpre: 2985f757f3fSDimitry Andric return AArch64::LDRWpre; 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric static unsigned getMatchingWideOpcode(unsigned Opc) { 3030b57cec5SDimitry Andric switch (Opc) { 3040b57cec5SDimitry Andric default: 3050b57cec5SDimitry Andric llvm_unreachable("Opcode has no wide equivalent!"); 3060b57cec5SDimitry Andric case AArch64::STRBBui: 3070b57cec5SDimitry Andric return AArch64::STRHHui; 3080b57cec5SDimitry Andric case AArch64::STRHHui: 3090b57cec5SDimitry Andric return AArch64::STRWui; 3100b57cec5SDimitry Andric case AArch64::STURBBi: 3110b57cec5SDimitry Andric return AArch64::STURHHi; 3120b57cec5SDimitry Andric case AArch64::STURHHi: 3130b57cec5SDimitry Andric return AArch64::STURWi; 3140b57cec5SDimitry Andric case AArch64::STURWi: 3150b57cec5SDimitry Andric return AArch64::STURXi; 3160b57cec5SDimitry Andric case AArch64::STRWui: 3170b57cec5SDimitry Andric return AArch64::STRXui; 3180b57cec5SDimitry Andric } 3190b57cec5SDimitry Andric } 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric static unsigned getMatchingPairOpcode(unsigned Opc) { 3220b57cec5SDimitry Andric switch (Opc) { 3230b57cec5SDimitry Andric default: 3240b57cec5SDimitry Andric llvm_unreachable("Opcode has no pairwise equivalent!"); 3250b57cec5SDimitry Andric case AArch64::STRSui: 3260b57cec5SDimitry Andric case AArch64::STURSi: 3270b57cec5SDimitry Andric return AArch64::STPSi; 328fe6060f1SDimitry Andric case AArch64::STRSpre: 329fe6060f1SDimitry Andric return AArch64::STPSpre; 3300b57cec5SDimitry Andric case AArch64::STRDui: 3310b57cec5SDimitry Andric case AArch64::STURDi: 3320b57cec5SDimitry Andric return AArch64::STPDi; 333fe6060f1SDimitry Andric case AArch64::STRDpre: 334fe6060f1SDimitry Andric return AArch64::STPDpre; 3350b57cec5SDimitry Andric case AArch64::STRQui: 3360b57cec5SDimitry Andric case AArch64::STURQi: 3370b57cec5SDimitry Andric return AArch64::STPQi; 338fe6060f1SDimitry Andric case AArch64::STRQpre: 339fe6060f1SDimitry Andric return AArch64::STPQpre; 3400b57cec5SDimitry Andric case AArch64::STRWui: 3410b57cec5SDimitry Andric case AArch64::STURWi: 3420b57cec5SDimitry Andric return AArch64::STPWi; 343fe6060f1SDimitry Andric case AArch64::STRWpre: 344fe6060f1SDimitry Andric return AArch64::STPWpre; 3450b57cec5SDimitry Andric case AArch64::STRXui: 3460b57cec5SDimitry Andric case AArch64::STURXi: 3470b57cec5SDimitry Andric return AArch64::STPXi; 348fe6060f1SDimitry Andric case AArch64::STRXpre: 349fe6060f1SDimitry Andric return AArch64::STPXpre; 3500b57cec5SDimitry Andric case AArch64::LDRSui: 3510b57cec5SDimitry Andric case AArch64::LDURSi: 3520b57cec5SDimitry Andric return AArch64::LDPSi; 353fe6060f1SDimitry Andric case AArch64::LDRSpre: 354fe6060f1SDimitry Andric return AArch64::LDPSpre; 3550b57cec5SDimitry Andric case AArch64::LDRDui: 3560b57cec5SDimitry Andric case AArch64::LDURDi: 3570b57cec5SDimitry Andric return AArch64::LDPDi; 358fe6060f1SDimitry Andric case AArch64::LDRDpre: 359fe6060f1SDimitry Andric return AArch64::LDPDpre; 3600b57cec5SDimitry Andric case AArch64::LDRQui: 3610b57cec5SDimitry Andric case AArch64::LDURQi: 3620b57cec5SDimitry Andric return AArch64::LDPQi; 363fe6060f1SDimitry Andric case AArch64::LDRQpre: 364fe6060f1SDimitry Andric return AArch64::LDPQpre; 3650b57cec5SDimitry Andric case AArch64::LDRWui: 3660b57cec5SDimitry Andric case AArch64::LDURWi: 3670b57cec5SDimitry Andric return AArch64::LDPWi; 368fe6060f1SDimitry Andric case AArch64::LDRWpre: 369fe6060f1SDimitry Andric return AArch64::LDPWpre; 3700b57cec5SDimitry Andric case AArch64::LDRXui: 3710b57cec5SDimitry Andric case AArch64::LDURXi: 3720b57cec5SDimitry Andric return AArch64::LDPXi; 373fe6060f1SDimitry Andric case AArch64::LDRXpre: 374fe6060f1SDimitry Andric return AArch64::LDPXpre; 3750b57cec5SDimitry Andric case AArch64::LDRSWui: 3760b57cec5SDimitry Andric case AArch64::LDURSWi: 3770b57cec5SDimitry Andric return AArch64::LDPSWi; 3785f757f3fSDimitry Andric case AArch64::LDRSWpre: 3795f757f3fSDimitry Andric return AArch64::LDPSWpre; 3800b57cec5SDimitry Andric } 3810b57cec5SDimitry Andric } 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric static unsigned isMatchingStore(MachineInstr &LoadInst, 3840b57cec5SDimitry Andric MachineInstr &StoreInst) { 3850b57cec5SDimitry Andric unsigned LdOpc = LoadInst.getOpcode(); 3860b57cec5SDimitry Andric unsigned StOpc = StoreInst.getOpcode(); 3870b57cec5SDimitry Andric switch (LdOpc) { 3880b57cec5SDimitry Andric default: 3890b57cec5SDimitry Andric llvm_unreachable("Unsupported load instruction!"); 3900b57cec5SDimitry Andric case AArch64::LDRBBui: 3910b57cec5SDimitry Andric return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui || 3920b57cec5SDimitry Andric StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; 3930b57cec5SDimitry Andric case AArch64::LDURBBi: 3940b57cec5SDimitry Andric return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi || 3950b57cec5SDimitry Andric StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; 3960b57cec5SDimitry Andric case AArch64::LDRHHui: 3970b57cec5SDimitry Andric return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui || 3980b57cec5SDimitry Andric StOpc == AArch64::STRXui; 3990b57cec5SDimitry Andric case AArch64::LDURHHi: 4000b57cec5SDimitry Andric return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi || 4010b57cec5SDimitry Andric StOpc == AArch64::STURXi; 4020b57cec5SDimitry Andric case AArch64::LDRWui: 4030b57cec5SDimitry Andric return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; 4040b57cec5SDimitry Andric case AArch64::LDURWi: 4050b57cec5SDimitry Andric return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; 4060b57cec5SDimitry Andric case AArch64::LDRXui: 4070b57cec5SDimitry Andric return StOpc == AArch64::STRXui; 4080b57cec5SDimitry Andric case AArch64::LDURXi: 4090b57cec5SDimitry Andric return StOpc == AArch64::STURXi; 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric } 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andric static unsigned getPreIndexedOpcode(unsigned Opc) { 4140b57cec5SDimitry Andric // FIXME: We don't currently support creating pre-indexed loads/stores when 4150b57cec5SDimitry Andric // the load or store is the unscaled version. If we decide to perform such an 4160b57cec5SDimitry Andric // optimization in the future the cases for the unscaled loads/stores will 4170b57cec5SDimitry Andric // need to be added here. 4180b57cec5SDimitry Andric switch (Opc) { 4190b57cec5SDimitry Andric default: 4200b57cec5SDimitry Andric llvm_unreachable("Opcode has no pre-indexed equivalent!"); 4210b57cec5SDimitry Andric case AArch64::STRSui: 4220b57cec5SDimitry Andric return AArch64::STRSpre; 4230b57cec5SDimitry Andric case AArch64::STRDui: 4240b57cec5SDimitry Andric return AArch64::STRDpre; 4250b57cec5SDimitry Andric case AArch64::STRQui: 4260b57cec5SDimitry Andric return AArch64::STRQpre; 4270b57cec5SDimitry Andric case AArch64::STRBBui: 4280b57cec5SDimitry Andric return AArch64::STRBBpre; 4290b57cec5SDimitry Andric case AArch64::STRHHui: 4300b57cec5SDimitry Andric return AArch64::STRHHpre; 4310b57cec5SDimitry Andric case AArch64::STRWui: 4320b57cec5SDimitry Andric return AArch64::STRWpre; 4330b57cec5SDimitry Andric case AArch64::STRXui: 4340b57cec5SDimitry Andric return AArch64::STRXpre; 4350b57cec5SDimitry Andric case AArch64::LDRSui: 4360b57cec5SDimitry Andric return AArch64::LDRSpre; 4370b57cec5SDimitry Andric case AArch64::LDRDui: 4380b57cec5SDimitry Andric return AArch64::LDRDpre; 4390b57cec5SDimitry Andric case AArch64::LDRQui: 4400b57cec5SDimitry Andric return AArch64::LDRQpre; 4410b57cec5SDimitry Andric case AArch64::LDRBBui: 4420b57cec5SDimitry Andric return AArch64::LDRBBpre; 4430b57cec5SDimitry Andric case AArch64::LDRHHui: 4440b57cec5SDimitry Andric return AArch64::LDRHHpre; 4450b57cec5SDimitry Andric case AArch64::LDRWui: 4460b57cec5SDimitry Andric return AArch64::LDRWpre; 4470b57cec5SDimitry Andric case AArch64::LDRXui: 4480b57cec5SDimitry Andric return AArch64::LDRXpre; 4490b57cec5SDimitry Andric case AArch64::LDRSWui: 4500b57cec5SDimitry Andric return AArch64::LDRSWpre; 4510b57cec5SDimitry Andric case AArch64::LDPSi: 4520b57cec5SDimitry Andric return AArch64::LDPSpre; 4530b57cec5SDimitry Andric case AArch64::LDPSWi: 4540b57cec5SDimitry Andric return AArch64::LDPSWpre; 4550b57cec5SDimitry Andric case AArch64::LDPDi: 4560b57cec5SDimitry Andric return AArch64::LDPDpre; 4570b57cec5SDimitry Andric case AArch64::LDPQi: 4580b57cec5SDimitry Andric return AArch64::LDPQpre; 4590b57cec5SDimitry Andric case AArch64::LDPWi: 4600b57cec5SDimitry Andric return AArch64::LDPWpre; 4610b57cec5SDimitry Andric case AArch64::LDPXi: 4620b57cec5SDimitry Andric return AArch64::LDPXpre; 4630b57cec5SDimitry Andric case AArch64::STPSi: 4640b57cec5SDimitry Andric return AArch64::STPSpre; 4650b57cec5SDimitry Andric case AArch64::STPDi: 4660b57cec5SDimitry Andric return AArch64::STPDpre; 4670b57cec5SDimitry Andric case AArch64::STPQi: 4680b57cec5SDimitry Andric return AArch64::STPQpre; 4690b57cec5SDimitry Andric case AArch64::STPWi: 4700b57cec5SDimitry Andric return AArch64::STPWpre; 4710b57cec5SDimitry Andric case AArch64::STPXi: 4720b57cec5SDimitry Andric return AArch64::STPXpre; 47306c3fb27SDimitry Andric case AArch64::STGi: 4748bcb0991SDimitry Andric return AArch64::STGPreIndex; 47506c3fb27SDimitry Andric case AArch64::STZGi: 4768bcb0991SDimitry Andric return AArch64::STZGPreIndex; 47706c3fb27SDimitry Andric case AArch64::ST2Gi: 4788bcb0991SDimitry Andric return AArch64::ST2GPreIndex; 47906c3fb27SDimitry Andric case AArch64::STZ2Gi: 4808bcb0991SDimitry Andric return AArch64::STZ2GPreIndex; 4818bcb0991SDimitry Andric case AArch64::STGPi: 4828bcb0991SDimitry Andric return AArch64::STGPpre; 4830b57cec5SDimitry Andric } 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric 4860b57cec5SDimitry Andric static unsigned getPostIndexedOpcode(unsigned Opc) { 4870b57cec5SDimitry Andric switch (Opc) { 4880b57cec5SDimitry Andric default: 4890b57cec5SDimitry Andric llvm_unreachable("Opcode has no post-indexed wise equivalent!"); 4900b57cec5SDimitry Andric case AArch64::STRSui: 4910b57cec5SDimitry Andric case AArch64::STURSi: 4920b57cec5SDimitry Andric return AArch64::STRSpost; 4930b57cec5SDimitry Andric case AArch64::STRDui: 4940b57cec5SDimitry Andric case AArch64::STURDi: 4950b57cec5SDimitry Andric return AArch64::STRDpost; 4960b57cec5SDimitry Andric case AArch64::STRQui: 4970b57cec5SDimitry Andric case AArch64::STURQi: 4980b57cec5SDimitry Andric return AArch64::STRQpost; 4990b57cec5SDimitry Andric case AArch64::STRBBui: 5000b57cec5SDimitry Andric return AArch64::STRBBpost; 5010b57cec5SDimitry Andric case AArch64::STRHHui: 5020b57cec5SDimitry Andric return AArch64::STRHHpost; 5030b57cec5SDimitry Andric case AArch64::STRWui: 5040b57cec5SDimitry Andric case AArch64::STURWi: 5050b57cec5SDimitry Andric return AArch64::STRWpost; 5060b57cec5SDimitry Andric case AArch64::STRXui: 5070b57cec5SDimitry Andric case AArch64::STURXi: 5080b57cec5SDimitry Andric return AArch64::STRXpost; 5090b57cec5SDimitry Andric case AArch64::LDRSui: 5100b57cec5SDimitry Andric case AArch64::LDURSi: 5110b57cec5SDimitry Andric return AArch64::LDRSpost; 5120b57cec5SDimitry Andric case AArch64::LDRDui: 5130b57cec5SDimitry Andric case AArch64::LDURDi: 5140b57cec5SDimitry Andric return AArch64::LDRDpost; 5150b57cec5SDimitry Andric case AArch64::LDRQui: 5160b57cec5SDimitry Andric case AArch64::LDURQi: 5170b57cec5SDimitry Andric return AArch64::LDRQpost; 5180b57cec5SDimitry Andric case AArch64::LDRBBui: 5190b57cec5SDimitry Andric return AArch64::LDRBBpost; 5200b57cec5SDimitry Andric case AArch64::LDRHHui: 5210b57cec5SDimitry Andric return AArch64::LDRHHpost; 5220b57cec5SDimitry Andric case AArch64::LDRWui: 5230b57cec5SDimitry Andric case AArch64::LDURWi: 5240b57cec5SDimitry Andric return AArch64::LDRWpost; 5250b57cec5SDimitry Andric case AArch64::LDRXui: 5260b57cec5SDimitry Andric case AArch64::LDURXi: 5270b57cec5SDimitry Andric return AArch64::LDRXpost; 5280b57cec5SDimitry Andric case AArch64::LDRSWui: 5290b57cec5SDimitry Andric return AArch64::LDRSWpost; 5300b57cec5SDimitry Andric case AArch64::LDPSi: 5310b57cec5SDimitry Andric return AArch64::LDPSpost; 5320b57cec5SDimitry Andric case AArch64::LDPSWi: 5330b57cec5SDimitry Andric return AArch64::LDPSWpost; 5340b57cec5SDimitry Andric case AArch64::LDPDi: 5350b57cec5SDimitry Andric return AArch64::LDPDpost; 5360b57cec5SDimitry Andric case AArch64::LDPQi: 5370b57cec5SDimitry Andric return AArch64::LDPQpost; 5380b57cec5SDimitry Andric case AArch64::LDPWi: 5390b57cec5SDimitry Andric return AArch64::LDPWpost; 5400b57cec5SDimitry Andric case AArch64::LDPXi: 5410b57cec5SDimitry Andric return AArch64::LDPXpost; 5420b57cec5SDimitry Andric case AArch64::STPSi: 5430b57cec5SDimitry Andric return AArch64::STPSpost; 5440b57cec5SDimitry Andric case AArch64::STPDi: 5450b57cec5SDimitry Andric return AArch64::STPDpost; 5460b57cec5SDimitry Andric case AArch64::STPQi: 5470b57cec5SDimitry Andric return AArch64::STPQpost; 5480b57cec5SDimitry Andric case AArch64::STPWi: 5490b57cec5SDimitry Andric return AArch64::STPWpost; 5500b57cec5SDimitry Andric case AArch64::STPXi: 5510b57cec5SDimitry Andric return AArch64::STPXpost; 55206c3fb27SDimitry Andric case AArch64::STGi: 5538bcb0991SDimitry Andric return AArch64::STGPostIndex; 55406c3fb27SDimitry Andric case AArch64::STZGi: 5558bcb0991SDimitry Andric return AArch64::STZGPostIndex; 55606c3fb27SDimitry Andric case AArch64::ST2Gi: 5578bcb0991SDimitry Andric return AArch64::ST2GPostIndex; 55806c3fb27SDimitry Andric case AArch64::STZ2Gi: 5598bcb0991SDimitry Andric return AArch64::STZ2GPostIndex; 5608bcb0991SDimitry Andric case AArch64::STGPi: 5618bcb0991SDimitry Andric return AArch64::STGPpost; 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric 565fe6060f1SDimitry Andric static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) { 566fe6060f1SDimitry Andric 567fe6060f1SDimitry Andric unsigned OpcA = FirstMI.getOpcode(); 568fe6060f1SDimitry Andric unsigned OpcB = MI.getOpcode(); 569fe6060f1SDimitry Andric 570fe6060f1SDimitry Andric switch (OpcA) { 571fe6060f1SDimitry Andric default: 572fe6060f1SDimitry Andric return false; 573fe6060f1SDimitry Andric case AArch64::STRSpre: 574fe6060f1SDimitry Andric return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi); 575fe6060f1SDimitry Andric case AArch64::STRDpre: 576fe6060f1SDimitry Andric return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi); 577fe6060f1SDimitry Andric case AArch64::STRQpre: 578fe6060f1SDimitry Andric return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi); 579fe6060f1SDimitry Andric case AArch64::STRWpre: 580fe6060f1SDimitry Andric return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi); 581fe6060f1SDimitry Andric case AArch64::STRXpre: 582fe6060f1SDimitry Andric return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi); 583fe6060f1SDimitry Andric case AArch64::LDRSpre: 584fe6060f1SDimitry Andric return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi); 585fe6060f1SDimitry Andric case AArch64::LDRDpre: 586fe6060f1SDimitry Andric return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi); 587fe6060f1SDimitry Andric case AArch64::LDRQpre: 588fe6060f1SDimitry Andric return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi); 589fe6060f1SDimitry Andric case AArch64::LDRWpre: 590fe6060f1SDimitry Andric return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi); 591fe6060f1SDimitry Andric case AArch64::LDRXpre: 592fe6060f1SDimitry Andric return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi); 5935f757f3fSDimitry Andric case AArch64::LDRSWpre: 5945f757f3fSDimitry Andric return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi); 595fe6060f1SDimitry Andric } 596fe6060f1SDimitry Andric } 597fe6060f1SDimitry Andric 5988bcb0991SDimitry Andric // Returns the scale and offset range of pre/post indexed variants of MI. 5998bcb0991SDimitry Andric static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, 6008bcb0991SDimitry Andric int &MinOffset, int &MaxOffset) { 60181ad6265SDimitry Andric bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI); 6028bcb0991SDimitry Andric bool IsTagStore = isTagStore(MI); 6038bcb0991SDimitry Andric // ST*G and all paired ldst have the same scale in pre/post-indexed variants 6048bcb0991SDimitry Andric // as in the "unsigned offset" variant. 6058bcb0991SDimitry Andric // All other pre/post indexed ldst instructions are unscaled. 606480093f4SDimitry Andric Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1; 6078bcb0991SDimitry Andric 6088bcb0991SDimitry Andric if (IsPaired) { 6098bcb0991SDimitry Andric MinOffset = -64; 6108bcb0991SDimitry Andric MaxOffset = 63; 6118bcb0991SDimitry Andric } else { 6128bcb0991SDimitry Andric MinOffset = -256; 6138bcb0991SDimitry Andric MaxOffset = 255; 6148bcb0991SDimitry Andric } 6158bcb0991SDimitry Andric } 6168bcb0991SDimitry Andric 617480093f4SDimitry Andric static MachineOperand &getLdStRegOp(MachineInstr &MI, 6180b57cec5SDimitry Andric unsigned PairedRegOp = 0) { 6190b57cec5SDimitry Andric assert(PairedRegOp < 2 && "Unexpected register operand idx."); 620fe6060f1SDimitry Andric bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI); 621fe6060f1SDimitry Andric if (IsPreLdSt) 622fe6060f1SDimitry Andric PairedRegOp += 1; 62381ad6265SDimitry Andric unsigned Idx = 62481ad6265SDimitry Andric AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0; 6250b57cec5SDimitry Andric return MI.getOperand(Idx); 6260b57cec5SDimitry Andric } 6270b57cec5SDimitry Andric 6280b57cec5SDimitry Andric static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, 6290b57cec5SDimitry Andric MachineInstr &StoreInst, 6300b57cec5SDimitry Andric const AArch64InstrInfo *TII) { 6310b57cec5SDimitry Andric assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); 632480093f4SDimitry Andric int LoadSize = TII->getMemScale(LoadInst); 633480093f4SDimitry Andric int StoreSize = TII->getMemScale(StoreInst); 63481ad6265SDimitry Andric int UnscaledStOffset = 63581ad6265SDimitry Andric TII->hasUnscaledLdStOffset(StoreInst) 63681ad6265SDimitry Andric ? AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm() 63781ad6265SDimitry Andric : AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm() * StoreSize; 63881ad6265SDimitry Andric int UnscaledLdOffset = 63981ad6265SDimitry Andric TII->hasUnscaledLdStOffset(LoadInst) 64081ad6265SDimitry Andric ? AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm() 64181ad6265SDimitry Andric : AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm() * LoadSize; 6420b57cec5SDimitry Andric return (UnscaledStOffset <= UnscaledLdOffset) && 6430b57cec5SDimitry Andric (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); 6440b57cec5SDimitry Andric } 6450b57cec5SDimitry Andric 6460b57cec5SDimitry Andric static bool isPromotableZeroStoreInst(MachineInstr &MI) { 6470b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 6480b57cec5SDimitry Andric return (Opc == AArch64::STRWui || Opc == AArch64::STURWi || 6490b57cec5SDimitry Andric isNarrowStore(Opc)) && 6500b57cec5SDimitry Andric getLdStRegOp(MI).getReg() == AArch64::WZR; 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric static bool isPromotableLoadFromStore(MachineInstr &MI) { 6540b57cec5SDimitry Andric switch (MI.getOpcode()) { 6550b57cec5SDimitry Andric default: 6560b57cec5SDimitry Andric return false; 6570b57cec5SDimitry Andric // Scaled instructions. 6580b57cec5SDimitry Andric case AArch64::LDRBBui: 6590b57cec5SDimitry Andric case AArch64::LDRHHui: 6600b57cec5SDimitry Andric case AArch64::LDRWui: 6610b57cec5SDimitry Andric case AArch64::LDRXui: 6620b57cec5SDimitry Andric // Unscaled instructions. 6630b57cec5SDimitry Andric case AArch64::LDURBBi: 6640b57cec5SDimitry Andric case AArch64::LDURHHi: 6650b57cec5SDimitry Andric case AArch64::LDURWi: 6660b57cec5SDimitry Andric case AArch64::LDURXi: 6670b57cec5SDimitry Andric return true; 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric static bool isMergeableLdStUpdate(MachineInstr &MI) { 6720b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 6730b57cec5SDimitry Andric switch (Opc) { 6740b57cec5SDimitry Andric default: 6750b57cec5SDimitry Andric return false; 6760b57cec5SDimitry Andric // Scaled instructions. 6770b57cec5SDimitry Andric case AArch64::STRSui: 6780b57cec5SDimitry Andric case AArch64::STRDui: 6790b57cec5SDimitry Andric case AArch64::STRQui: 6800b57cec5SDimitry Andric case AArch64::STRXui: 6810b57cec5SDimitry Andric case AArch64::STRWui: 6820b57cec5SDimitry Andric case AArch64::STRHHui: 6830b57cec5SDimitry Andric case AArch64::STRBBui: 6840b57cec5SDimitry Andric case AArch64::LDRSui: 6850b57cec5SDimitry Andric case AArch64::LDRDui: 6860b57cec5SDimitry Andric case AArch64::LDRQui: 6870b57cec5SDimitry Andric case AArch64::LDRXui: 6880b57cec5SDimitry Andric case AArch64::LDRWui: 6890b57cec5SDimitry Andric case AArch64::LDRHHui: 6900b57cec5SDimitry Andric case AArch64::LDRBBui: 69106c3fb27SDimitry Andric case AArch64::STGi: 69206c3fb27SDimitry Andric case AArch64::STZGi: 69306c3fb27SDimitry Andric case AArch64::ST2Gi: 69406c3fb27SDimitry Andric case AArch64::STZ2Gi: 6958bcb0991SDimitry Andric case AArch64::STGPi: 6960b57cec5SDimitry Andric // Unscaled instructions. 6970b57cec5SDimitry Andric case AArch64::STURSi: 6980b57cec5SDimitry Andric case AArch64::STURDi: 6990b57cec5SDimitry Andric case AArch64::STURQi: 7000b57cec5SDimitry Andric case AArch64::STURWi: 7010b57cec5SDimitry Andric case AArch64::STURXi: 7020b57cec5SDimitry Andric case AArch64::LDURSi: 7030b57cec5SDimitry Andric case AArch64::LDURDi: 7040b57cec5SDimitry Andric case AArch64::LDURQi: 7050b57cec5SDimitry Andric case AArch64::LDURWi: 7060b57cec5SDimitry Andric case AArch64::LDURXi: 7070b57cec5SDimitry Andric // Paired instructions. 7080b57cec5SDimitry Andric case AArch64::LDPSi: 7090b57cec5SDimitry Andric case AArch64::LDPSWi: 7100b57cec5SDimitry Andric case AArch64::LDPDi: 7110b57cec5SDimitry Andric case AArch64::LDPQi: 7120b57cec5SDimitry Andric case AArch64::LDPWi: 7130b57cec5SDimitry Andric case AArch64::LDPXi: 7140b57cec5SDimitry Andric case AArch64::STPSi: 7150b57cec5SDimitry Andric case AArch64::STPDi: 7160b57cec5SDimitry Andric case AArch64::STPQi: 7170b57cec5SDimitry Andric case AArch64::STPWi: 7180b57cec5SDimitry Andric case AArch64::STPXi: 7190b57cec5SDimitry Andric // Make sure this is a reg+imm (as opposed to an address reloc). 72081ad6265SDimitry Andric if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) 7210b57cec5SDimitry Andric return false; 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric return true; 7240b57cec5SDimitry Andric } 7250b57cec5SDimitry Andric } 7260b57cec5SDimitry Andric 7275f757f3fSDimitry Andric static bool isRewritableImplicitDef(unsigned Opc) { 7285f757f3fSDimitry Andric switch (Opc) { 7295f757f3fSDimitry Andric default: 7305f757f3fSDimitry Andric return false; 7315f757f3fSDimitry Andric case AArch64::ORRWrs: 7325f757f3fSDimitry Andric case AArch64::ADDWri: 7335f757f3fSDimitry Andric return true; 7345f757f3fSDimitry Andric } 7355f757f3fSDimitry Andric } 7365f757f3fSDimitry Andric 7370b57cec5SDimitry Andric MachineBasicBlock::iterator 7380b57cec5SDimitry Andric AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, 7390b57cec5SDimitry Andric MachineBasicBlock::iterator MergeMI, 7400b57cec5SDimitry Andric const LdStPairFlags &Flags) { 7410b57cec5SDimitry Andric assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && 7420b57cec5SDimitry Andric "Expected promotable zero stores."); 7430b57cec5SDimitry Andric 7445ffd83dbSDimitry Andric MachineBasicBlock::iterator E = I->getParent()->end(); 7455ffd83dbSDimitry Andric MachineBasicBlock::iterator NextI = next_nodbg(I, E); 7460b57cec5SDimitry Andric // If NextI is the second of the two instructions to be merged, we need 7470b57cec5SDimitry Andric // to skip one further. Either way we merge will invalidate the iterator, 7480b57cec5SDimitry Andric // and we don't need to scan the new instruction, as it's a pairwise 7490b57cec5SDimitry Andric // instruction, which we're not considering for further action anyway. 7500b57cec5SDimitry Andric if (NextI == MergeMI) 7515ffd83dbSDimitry Andric NextI = next_nodbg(NextI, E); 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric unsigned Opc = I->getOpcode(); 75406c3fb27SDimitry Andric unsigned MergeMIOpc = MergeMI->getOpcode(); 755fe6060f1SDimitry Andric bool IsScaled = !TII->hasUnscaledLdStOffset(Opc); 75606c3fb27SDimitry Andric bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc); 75706c3fb27SDimitry Andric int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1; 75806c3fb27SDimitry Andric int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1; 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric bool MergeForward = Flags.getMergeForward(); 7610b57cec5SDimitry Andric // Insert our new paired instruction after whichever of the paired 7620b57cec5SDimitry Andric // instructions MergeForward indicates. 7630b57cec5SDimitry Andric MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I; 7640b57cec5SDimitry Andric // Also based on MergeForward is from where we copy the base register operand 7650b57cec5SDimitry Andric // so we get the flags compatible with the input code. 7660b57cec5SDimitry Andric const MachineOperand &BaseRegOp = 76781ad6265SDimitry Andric MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI) 76881ad6265SDimitry Andric : AArch64InstrInfo::getLdStBaseOp(*I); 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric // Which register is Rt and which is Rt2 depends on the offset order. 77106c3fb27SDimitry Andric int64_t IOffsetInBytes = 77206c3fb27SDimitry Andric AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride; 77306c3fb27SDimitry Andric int64_t MIOffsetInBytes = 77406c3fb27SDimitry Andric AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() * 77506c3fb27SDimitry Andric MergeMIOffsetStride; 77606c3fb27SDimitry Andric // Select final offset based on the offset order. 77706c3fb27SDimitry Andric int64_t OffsetImm; 77806c3fb27SDimitry Andric if (IOffsetInBytes > MIOffsetInBytes) 77906c3fb27SDimitry Andric OffsetImm = MIOffsetInBytes; 7800b57cec5SDimitry Andric else 78106c3fb27SDimitry Andric OffsetImm = IOffsetInBytes; 7820b57cec5SDimitry Andric 78306c3fb27SDimitry Andric int NewOpcode = getMatchingWideOpcode(Opc); 78406c3fb27SDimitry Andric bool FinalIsScaled = !TII->hasUnscaledLdStOffset(NewOpcode); 78506c3fb27SDimitry Andric 78606c3fb27SDimitry Andric // Adjust final offset if the result opcode is a scaled store. 78706c3fb27SDimitry Andric if (FinalIsScaled) { 78806c3fb27SDimitry Andric int NewOffsetStride = FinalIsScaled ? TII->getMemScale(NewOpcode) : 1; 78906c3fb27SDimitry Andric assert(((OffsetImm % NewOffsetStride) == 0) && 79006c3fb27SDimitry Andric "Offset should be a multiple of the store memory scale"); 79106c3fb27SDimitry Andric OffsetImm = OffsetImm / NewOffsetStride; 7920b57cec5SDimitry Andric } 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric // Construct the new instruction. 7950b57cec5SDimitry Andric DebugLoc DL = I->getDebugLoc(); 7960b57cec5SDimitry Andric MachineBasicBlock *MBB = I->getParent(); 7970b57cec5SDimitry Andric MachineInstrBuilder MIB; 7980b57cec5SDimitry Andric MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc))) 7990b57cec5SDimitry Andric .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR) 8000b57cec5SDimitry Andric .add(BaseRegOp) 8010b57cec5SDimitry Andric .addImm(OffsetImm) 8020b57cec5SDimitry Andric .cloneMergedMemRefs({&*I, &*MergeMI}) 8030b57cec5SDimitry Andric .setMIFlags(I->mergeFlagsWith(*MergeMI)); 8040b57cec5SDimitry Andric (void)MIB; 8050b57cec5SDimitry Andric 8060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n "); 8070b57cec5SDimitry Andric LLVM_DEBUG(I->print(dbgs())); 8080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " "); 8090b57cec5SDimitry Andric LLVM_DEBUG(MergeMI->print(dbgs())); 8100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " with instruction:\n "); 8110b57cec5SDimitry Andric LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); 8120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 8130b57cec5SDimitry Andric 8140b57cec5SDimitry Andric // Erase the old instructions. 8150b57cec5SDimitry Andric I->eraseFromParent(); 8160b57cec5SDimitry Andric MergeMI->eraseFromParent(); 8170b57cec5SDimitry Andric return NextI; 8180b57cec5SDimitry Andric } 8190b57cec5SDimitry Andric 820480093f4SDimitry Andric // Apply Fn to all instructions between MI and the beginning of the block, until 821480093f4SDimitry Andric // a def for DefReg is reached. Returns true, iff Fn returns true for all 822480093f4SDimitry Andric // visited instructions. Stop after visiting Limit iterations. 823480093f4SDimitry Andric static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, 824480093f4SDimitry Andric const TargetRegisterInfo *TRI, unsigned Limit, 825480093f4SDimitry Andric std::function<bool(MachineInstr &, bool)> &Fn) { 826480093f4SDimitry Andric auto MBB = MI.getParent(); 8275ffd83dbSDimitry Andric for (MachineInstr &I : 8285ffd83dbSDimitry Andric instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) { 829480093f4SDimitry Andric if (!Limit) 830480093f4SDimitry Andric return false; 831480093f4SDimitry Andric --Limit; 832480093f4SDimitry Andric 8335ffd83dbSDimitry Andric bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) { 834480093f4SDimitry Andric return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() && 835480093f4SDimitry Andric TRI->regsOverlap(MOP.getReg(), DefReg); 836480093f4SDimitry Andric }); 8375ffd83dbSDimitry Andric if (!Fn(I, isDef)) 838480093f4SDimitry Andric return false; 839480093f4SDimitry Andric if (isDef) 840480093f4SDimitry Andric break; 841480093f4SDimitry Andric } 842480093f4SDimitry Andric return true; 843480093f4SDimitry Andric } 844480093f4SDimitry Andric 845480093f4SDimitry Andric static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, 846480093f4SDimitry Andric const TargetRegisterInfo *TRI) { 847480093f4SDimitry Andric 848480093f4SDimitry Andric for (const MachineOperand &MOP : phys_regs_and_masks(MI)) 849480093f4SDimitry Andric if (MOP.isReg() && MOP.isKill()) 850480093f4SDimitry Andric Units.removeReg(MOP.getReg()); 851480093f4SDimitry Andric 852480093f4SDimitry Andric for (const MachineOperand &MOP : phys_regs_and_masks(MI)) 853480093f4SDimitry Andric if (MOP.isReg() && !MOP.isKill()) 854480093f4SDimitry Andric Units.addReg(MOP.getReg()); 855480093f4SDimitry Andric } 856480093f4SDimitry Andric 8570b57cec5SDimitry Andric MachineBasicBlock::iterator 8580b57cec5SDimitry Andric AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, 8590b57cec5SDimitry Andric MachineBasicBlock::iterator Paired, 8600b57cec5SDimitry Andric const LdStPairFlags &Flags) { 8615ffd83dbSDimitry Andric MachineBasicBlock::iterator E = I->getParent()->end(); 8625ffd83dbSDimitry Andric MachineBasicBlock::iterator NextI = next_nodbg(I, E); 8630b57cec5SDimitry Andric // If NextI is the second of the two instructions to be merged, we need 8640b57cec5SDimitry Andric // to skip one further. Either way we merge will invalidate the iterator, 8650b57cec5SDimitry Andric // and we don't need to scan the new instruction, as it's a pairwise 8660b57cec5SDimitry Andric // instruction, which we're not considering for further action anyway. 8670b57cec5SDimitry Andric if (NextI == Paired) 8685ffd83dbSDimitry Andric NextI = next_nodbg(NextI, E); 8690b57cec5SDimitry Andric 8700b57cec5SDimitry Andric int SExtIdx = Flags.getSExtIdx(); 8710b57cec5SDimitry Andric unsigned Opc = 8720b57cec5SDimitry Andric SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); 873fe6060f1SDimitry Andric bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc); 874480093f4SDimitry Andric int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1; 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andric bool MergeForward = Flags.getMergeForward(); 877480093f4SDimitry Andric 878bdd1243dSDimitry Andric std::optional<MCPhysReg> RenameReg = Flags.getRenameReg(); 8795f757f3fSDimitry Andric if (RenameReg) { 880480093f4SDimitry Andric MCRegister RegToRename = getLdStRegOp(*I).getReg(); 881480093f4SDimitry Andric DefinedInBB.addReg(*RenameReg); 882480093f4SDimitry Andric 883480093f4SDimitry Andric // Return the sub/super register for RenameReg, matching the size of 884480093f4SDimitry Andric // OriginalReg. 8855f757f3fSDimitry Andric auto GetMatchingSubReg = 8865f757f3fSDimitry Andric [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg { 8875f757f3fSDimitry Andric for (MCPhysReg SubOrSuper : 8885f757f3fSDimitry Andric TRI->sub_and_superregs_inclusive(*RenameReg)) { 8895f757f3fSDimitry Andric if (C->contains(SubOrSuper)) 890480093f4SDimitry Andric return SubOrSuper; 8915f757f3fSDimitry Andric } 892480093f4SDimitry Andric llvm_unreachable("Should have found matching sub or super register!"); 893480093f4SDimitry Andric }; 894480093f4SDimitry Andric 895480093f4SDimitry Andric std::function<bool(MachineInstr &, bool)> UpdateMIs = 8965f757f3fSDimitry Andric [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI, 8975f757f3fSDimitry Andric bool IsDef) { 898480093f4SDimitry Andric if (IsDef) { 899480093f4SDimitry Andric bool SeenDef = false; 9005f757f3fSDimitry Andric for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) { 9015f757f3fSDimitry Andric MachineOperand &MOP = MI.getOperand(OpIdx); 902480093f4SDimitry Andric // Rename the first explicit definition and all implicit 903480093f4SDimitry Andric // definitions matching RegToRename. 904480093f4SDimitry Andric if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() && 9055f757f3fSDimitry Andric (!MergeForward || !SeenDef || 9065f757f3fSDimitry Andric (MOP.isDef() && MOP.isImplicit())) && 907480093f4SDimitry Andric TRI->regsOverlap(MOP.getReg(), RegToRename)) { 908480093f4SDimitry Andric assert((MOP.isImplicit() || 909480093f4SDimitry Andric (MOP.isRenamable() && !MOP.isEarlyClobber())) && 910480093f4SDimitry Andric "Need renamable operands"); 9115f757f3fSDimitry Andric Register MatchingReg; 9125f757f3fSDimitry Andric if (const TargetRegisterClass *RC = 9135f757f3fSDimitry Andric MI.getRegClassConstraint(OpIdx, TII, TRI)) 9145f757f3fSDimitry Andric MatchingReg = GetMatchingSubReg(RC); 9155f757f3fSDimitry Andric else { 9165f757f3fSDimitry Andric if (!isRewritableImplicitDef(MI.getOpcode())) 9175f757f3fSDimitry Andric continue; 9185f757f3fSDimitry Andric MatchingReg = GetMatchingSubReg( 9195f757f3fSDimitry Andric TRI->getMinimalPhysRegClass(MOP.getReg())); 9205f757f3fSDimitry Andric } 9215f757f3fSDimitry Andric MOP.setReg(MatchingReg); 922480093f4SDimitry Andric SeenDef = true; 923480093f4SDimitry Andric } 924480093f4SDimitry Andric } 925480093f4SDimitry Andric } else { 9265f757f3fSDimitry Andric for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) { 9275f757f3fSDimitry Andric MachineOperand &MOP = MI.getOperand(OpIdx); 928480093f4SDimitry Andric if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() && 929480093f4SDimitry Andric TRI->regsOverlap(MOP.getReg(), RegToRename)) { 930480093f4SDimitry Andric assert((MOP.isImplicit() || 931480093f4SDimitry Andric (MOP.isRenamable() && !MOP.isEarlyClobber())) && 932480093f4SDimitry Andric "Need renamable operands"); 9335f757f3fSDimitry Andric Register MatchingReg; 9345f757f3fSDimitry Andric if (const TargetRegisterClass *RC = 9355f757f3fSDimitry Andric MI.getRegClassConstraint(OpIdx, TII, TRI)) 9365f757f3fSDimitry Andric MatchingReg = GetMatchingSubReg(RC); 9375f757f3fSDimitry Andric else 9385f757f3fSDimitry Andric MatchingReg = GetMatchingSubReg( 9395f757f3fSDimitry Andric TRI->getMinimalPhysRegClass(MOP.getReg())); 9405f757f3fSDimitry Andric assert(MatchingReg != AArch64::NoRegister && 9415f757f3fSDimitry Andric "Cannot find matching regs for renaming"); 9425f757f3fSDimitry Andric MOP.setReg(MatchingReg); 943480093f4SDimitry Andric } 944480093f4SDimitry Andric } 945480093f4SDimitry Andric } 9465f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Renamed " << MI); 947480093f4SDimitry Andric return true; 948480093f4SDimitry Andric }; 9495f757f3fSDimitry Andric forAllMIsUntilDef(MergeForward ? *I : *std::prev(Paired), RegToRename, TRI, 9505f757f3fSDimitry Andric UINT32_MAX, UpdateMIs); 951480093f4SDimitry Andric 952480093f4SDimitry Andric #if !defined(NDEBUG) 9535f757f3fSDimitry Andric // For forward merging store: 9545f757f3fSDimitry Andric // Make sure the register used for renaming is not used between the 9555f757f3fSDimitry Andric // paired instructions. That would trash the content before the new 9565f757f3fSDimitry Andric // paired instruction. 9575f757f3fSDimitry Andric MCPhysReg RegToCheck = *RenameReg; 9585f757f3fSDimitry Andric // For backward merging load: 9595f757f3fSDimitry Andric // Make sure the register being renamed is not used between the 9605f757f3fSDimitry Andric // paired instructions. That would trash the content after the new 9615f757f3fSDimitry Andric // paired instruction. 9625f757f3fSDimitry Andric if (!MergeForward) 9635f757f3fSDimitry Andric RegToCheck = RegToRename; 964480093f4SDimitry Andric for (auto &MI : 965480093f4SDimitry Andric iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>( 9665f757f3fSDimitry Andric MergeForward ? std::next(I) : I, 9675f757f3fSDimitry Andric MergeForward ? std::next(Paired) : Paired)) 968480093f4SDimitry Andric assert(all_of(MI.operands(), 9695f757f3fSDimitry Andric [this, RegToCheck](const MachineOperand &MOP) { 970480093f4SDimitry Andric return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() || 97181ad6265SDimitry Andric MOP.isUndef() || 9725f757f3fSDimitry Andric !TRI->regsOverlap(MOP.getReg(), RegToCheck); 973480093f4SDimitry Andric }) && 974480093f4SDimitry Andric "Rename register used between paired instruction, trashing the " 975480093f4SDimitry Andric "content"); 976480093f4SDimitry Andric #endif 977480093f4SDimitry Andric } 978480093f4SDimitry Andric 9790b57cec5SDimitry Andric // Insert our new paired instruction after whichever of the paired 9800b57cec5SDimitry Andric // instructions MergeForward indicates. 9810b57cec5SDimitry Andric MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; 9820b57cec5SDimitry Andric // Also based on MergeForward is from where we copy the base register operand 9830b57cec5SDimitry Andric // so we get the flags compatible with the input code. 9840b57cec5SDimitry Andric const MachineOperand &BaseRegOp = 98581ad6265SDimitry Andric MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired) 98681ad6265SDimitry Andric : AArch64InstrInfo::getLdStBaseOp(*I); 9870b57cec5SDimitry Andric 98881ad6265SDimitry Andric int Offset = AArch64InstrInfo::getLdStOffsetOp(*I).getImm(); 98981ad6265SDimitry Andric int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm(); 990fe6060f1SDimitry Andric bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode()); 9910b57cec5SDimitry Andric if (IsUnscaled != PairedIsUnscaled) { 9920b57cec5SDimitry Andric // We're trying to pair instructions that differ in how they are scaled. If 9930b57cec5SDimitry Andric // I is scaled then scale the offset of Paired accordingly. Otherwise, do 9940b57cec5SDimitry Andric // the opposite (i.e., make Paired's offset unscaled). 995480093f4SDimitry Andric int MemSize = TII->getMemScale(*Paired); 9960b57cec5SDimitry Andric if (PairedIsUnscaled) { 9970b57cec5SDimitry Andric // If the unscaled offset isn't a multiple of the MemSize, we can't 9980b57cec5SDimitry Andric // pair the operations together. 999480093f4SDimitry Andric assert(!(PairedOffset % TII->getMemScale(*Paired)) && 10000b57cec5SDimitry Andric "Offset should be a multiple of the stride!"); 10010b57cec5SDimitry Andric PairedOffset /= MemSize; 10020b57cec5SDimitry Andric } else { 10030b57cec5SDimitry Andric PairedOffset *= MemSize; 10040b57cec5SDimitry Andric } 10050b57cec5SDimitry Andric } 10060b57cec5SDimitry Andric 10070b57cec5SDimitry Andric // Which register is Rt and which is Rt2 depends on the offset order. 1008fe6060f1SDimitry Andric // However, for pre load/stores the Rt should be the one of the pre 1009fe6060f1SDimitry Andric // load/store. 10100b57cec5SDimitry Andric MachineInstr *RtMI, *Rt2MI; 1011fe6060f1SDimitry Andric if (Offset == PairedOffset + OffsetStride && 1012fe6060f1SDimitry Andric !AArch64InstrInfo::isPreLdSt(*I)) { 10130b57cec5SDimitry Andric RtMI = &*Paired; 10140b57cec5SDimitry Andric Rt2MI = &*I; 10150b57cec5SDimitry Andric // Here we swapped the assumption made for SExtIdx. 10160b57cec5SDimitry Andric // I.e., we turn ldp I, Paired into ldp Paired, I. 10170b57cec5SDimitry Andric // Update the index accordingly. 10180b57cec5SDimitry Andric if (SExtIdx != -1) 10190b57cec5SDimitry Andric SExtIdx = (SExtIdx + 1) % 2; 10200b57cec5SDimitry Andric } else { 10210b57cec5SDimitry Andric RtMI = &*I; 10220b57cec5SDimitry Andric Rt2MI = &*Paired; 10230b57cec5SDimitry Andric } 102481ad6265SDimitry Andric int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm(); 10250b57cec5SDimitry Andric // Scale the immediate offset, if necessary. 1026fe6060f1SDimitry Andric if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) { 1027480093f4SDimitry Andric assert(!(OffsetImm % TII->getMemScale(*RtMI)) && 10280b57cec5SDimitry Andric "Unscaled offset cannot be scaled."); 1029480093f4SDimitry Andric OffsetImm /= TII->getMemScale(*RtMI); 10300b57cec5SDimitry Andric } 10310b57cec5SDimitry Andric 10320b57cec5SDimitry Andric // Construct the new instruction. 10330b57cec5SDimitry Andric MachineInstrBuilder MIB; 10340b57cec5SDimitry Andric DebugLoc DL = I->getDebugLoc(); 10350b57cec5SDimitry Andric MachineBasicBlock *MBB = I->getParent(); 10360b57cec5SDimitry Andric MachineOperand RegOp0 = getLdStRegOp(*RtMI); 10370b57cec5SDimitry Andric MachineOperand RegOp1 = getLdStRegOp(*Rt2MI); 10385f757f3fSDimitry Andric MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1; 10390b57cec5SDimitry Andric // Kill flags may become invalid when moving stores for pairing. 10400b57cec5SDimitry Andric if (RegOp0.isUse()) { 10410b57cec5SDimitry Andric if (!MergeForward) { 10420b57cec5SDimitry Andric // Clear kill flags on store if moving upwards. Example: 10435f757f3fSDimitry Andric // STRWui kill %w0, ... 10440b57cec5SDimitry Andric // USE %w1 10450b57cec5SDimitry Andric // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards 10465f757f3fSDimitry Andric // We are about to move the store of w1, so its kill flag may become 10475f757f3fSDimitry Andric // invalid; not the case for w0. 10485f757f3fSDimitry Andric // Since w1 is used between the stores, the kill flag on w1 is cleared 10495f757f3fSDimitry Andric // after merging. 10505f757f3fSDimitry Andric // STPWi kill %w0, %w1, ... 10515f757f3fSDimitry Andric // USE %w1 10525f757f3fSDimitry Andric for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It) 10535f757f3fSDimitry Andric if (It->readsRegister(PairedRegOp.getReg(), TRI)) 10545f757f3fSDimitry Andric PairedRegOp.setIsKill(false); 10550b57cec5SDimitry Andric } else { 10560b57cec5SDimitry Andric // Clear kill flags of the first stores register. Example: 10570b57cec5SDimitry Andric // STRWui %w1, ... 10580b57cec5SDimitry Andric // USE kill %w1 ; need to clear kill flag when moving STRWui downwards 10590b57cec5SDimitry Andric // STRW %w0 10608bcb0991SDimitry Andric Register Reg = getLdStRegOp(*I).getReg(); 10610b57cec5SDimitry Andric for (MachineInstr &MI : make_range(std::next(I), Paired)) 10620b57cec5SDimitry Andric MI.clearRegisterKills(Reg, TRI); 10630b57cec5SDimitry Andric } 10640b57cec5SDimitry Andric } 1065fe6060f1SDimitry Andric 1066fe6060f1SDimitry Andric unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc); 1067fe6060f1SDimitry Andric MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode)); 1068fe6060f1SDimitry Andric 1069fe6060f1SDimitry Andric // Adds the pre-index operand for pre-indexed ld/st pairs. 1070fe6060f1SDimitry Andric if (AArch64InstrInfo::isPreLdSt(*RtMI)) 1071fe6060f1SDimitry Andric MIB.addReg(BaseRegOp.getReg(), RegState::Define); 1072fe6060f1SDimitry Andric 1073fe6060f1SDimitry Andric MIB.add(RegOp0) 10740b57cec5SDimitry Andric .add(RegOp1) 10750b57cec5SDimitry Andric .add(BaseRegOp) 10760b57cec5SDimitry Andric .addImm(OffsetImm) 10770b57cec5SDimitry Andric .cloneMergedMemRefs({&*I, &*Paired}) 10780b57cec5SDimitry Andric .setMIFlags(I->mergeFlagsWith(*Paired)); 10790b57cec5SDimitry Andric 10800b57cec5SDimitry Andric (void)MIB; 10810b57cec5SDimitry Andric 10820b57cec5SDimitry Andric LLVM_DEBUG( 10830b57cec5SDimitry Andric dbgs() << "Creating pair load/store. Replacing instructions:\n "); 10840b57cec5SDimitry Andric LLVM_DEBUG(I->print(dbgs())); 10850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " "); 10860b57cec5SDimitry Andric LLVM_DEBUG(Paired->print(dbgs())); 10870b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " with instruction:\n "); 10880b57cec5SDimitry Andric if (SExtIdx != -1) { 10890b57cec5SDimitry Andric // Generate the sign extension for the proper result of the ldp. 10900b57cec5SDimitry Andric // I.e., with X1, that would be: 10910b57cec5SDimitry Andric // %w1 = KILL %w1, implicit-def %x1 10920b57cec5SDimitry Andric // %x1 = SBFMXri killed %x1, 0, 31 10930b57cec5SDimitry Andric MachineOperand &DstMO = MIB->getOperand(SExtIdx); 10940b57cec5SDimitry Andric // Right now, DstMO has the extended register, since it comes from an 10950b57cec5SDimitry Andric // extended opcode. 10968bcb0991SDimitry Andric Register DstRegX = DstMO.getReg(); 10970b57cec5SDimitry Andric // Get the W variant of that register. 10988bcb0991SDimitry Andric Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); 10990b57cec5SDimitry Andric // Update the result of LDP to use the W instead of the X variant. 11000b57cec5SDimitry Andric DstMO.setReg(DstRegW); 11010b57cec5SDimitry Andric LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); 11020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 11030b57cec5SDimitry Andric // Make the machine verifier happy by providing a definition for 11040b57cec5SDimitry Andric // the X register. 11050b57cec5SDimitry Andric // Insert this definition right after the generated LDP, i.e., before 11060b57cec5SDimitry Andric // InsertionPoint. 11070b57cec5SDimitry Andric MachineInstrBuilder MIBKill = 11080b57cec5SDimitry Andric BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW) 11090b57cec5SDimitry Andric .addReg(DstRegW) 11100b57cec5SDimitry Andric .addReg(DstRegX, RegState::Define); 11110b57cec5SDimitry Andric MIBKill->getOperand(2).setImplicit(); 11120b57cec5SDimitry Andric // Create the sign extension. 11130b57cec5SDimitry Andric MachineInstrBuilder MIBSXTW = 11140b57cec5SDimitry Andric BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX) 11150b57cec5SDimitry Andric .addReg(DstRegX) 11160b57cec5SDimitry Andric .addImm(0) 11170b57cec5SDimitry Andric .addImm(31); 11180b57cec5SDimitry Andric (void)MIBSXTW; 11190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Extend operand:\n "); 11200b57cec5SDimitry Andric LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); 11210b57cec5SDimitry Andric } else { 11220b57cec5SDimitry Andric LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); 11230b57cec5SDimitry Andric } 11240b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 11250b57cec5SDimitry Andric 1126480093f4SDimitry Andric if (MergeForward) 1127480093f4SDimitry Andric for (const MachineOperand &MOP : phys_regs_and_masks(*I)) 1128480093f4SDimitry Andric if (MOP.isReg() && MOP.isKill()) 1129480093f4SDimitry Andric DefinedInBB.addReg(MOP.getReg()); 1130480093f4SDimitry Andric 11310b57cec5SDimitry Andric // Erase the old instructions. 11320b57cec5SDimitry Andric I->eraseFromParent(); 11330b57cec5SDimitry Andric Paired->eraseFromParent(); 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric return NextI; 11360b57cec5SDimitry Andric } 11370b57cec5SDimitry Andric 11380b57cec5SDimitry Andric MachineBasicBlock::iterator 11390b57cec5SDimitry Andric AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, 11400b57cec5SDimitry Andric MachineBasicBlock::iterator StoreI) { 11415ffd83dbSDimitry Andric MachineBasicBlock::iterator NextI = 11425ffd83dbSDimitry Andric next_nodbg(LoadI, LoadI->getParent()->end()); 11430b57cec5SDimitry Andric 1144480093f4SDimitry Andric int LoadSize = TII->getMemScale(*LoadI); 1145480093f4SDimitry Andric int StoreSize = TII->getMemScale(*StoreI); 11468bcb0991SDimitry Andric Register LdRt = getLdStRegOp(*LoadI).getReg(); 11470b57cec5SDimitry Andric const MachineOperand &StMO = getLdStRegOp(*StoreI); 11488bcb0991SDimitry Andric Register StRt = getLdStRegOp(*StoreI).getReg(); 11490b57cec5SDimitry Andric bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric assert((IsStoreXReg || 11520b57cec5SDimitry Andric TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && 11530b57cec5SDimitry Andric "Unexpected RegClass"); 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric MachineInstr *BitExtMI; 11560b57cec5SDimitry Andric if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) { 11570b57cec5SDimitry Andric // Remove the load, if the destination register of the loads is the same 11580b57cec5SDimitry Andric // register for stored value. 11590b57cec5SDimitry Andric if (StRt == LdRt && LoadSize == 8) { 11600b57cec5SDimitry Andric for (MachineInstr &MI : make_range(StoreI->getIterator(), 11610b57cec5SDimitry Andric LoadI->getIterator())) { 11620b57cec5SDimitry Andric if (MI.killsRegister(StRt, TRI)) { 11630b57cec5SDimitry Andric MI.clearRegisterKills(StRt, TRI); 11640b57cec5SDimitry Andric break; 11650b57cec5SDimitry Andric } 11660b57cec5SDimitry Andric } 11670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Remove load instruction:\n "); 11680b57cec5SDimitry Andric LLVM_DEBUG(LoadI->print(dbgs())); 11690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 11700b57cec5SDimitry Andric LoadI->eraseFromParent(); 11710b57cec5SDimitry Andric return NextI; 11720b57cec5SDimitry Andric } 11730b57cec5SDimitry Andric // Replace the load with a mov if the load and store are in the same size. 11740b57cec5SDimitry Andric BitExtMI = 11750b57cec5SDimitry Andric BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), 11760b57cec5SDimitry Andric TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) 11770b57cec5SDimitry Andric .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) 11780b57cec5SDimitry Andric .add(StMO) 11790b57cec5SDimitry Andric .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 11800b57cec5SDimitry Andric .setMIFlags(LoadI->getFlags()); 11810b57cec5SDimitry Andric } else { 11820b57cec5SDimitry Andric // FIXME: Currently we disable this transformation in big-endian targets as 11830b57cec5SDimitry Andric // performance and correctness are verified only in little-endian. 11840b57cec5SDimitry Andric if (!Subtarget->isLittleEndian()) 11850b57cec5SDimitry Andric return NextI; 1186fe6060f1SDimitry Andric bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI); 1187fe6060f1SDimitry Andric assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) && 11880b57cec5SDimitry Andric "Unsupported ld/st match"); 11890b57cec5SDimitry Andric assert(LoadSize <= StoreSize && "Invalid load size"); 119081ad6265SDimitry Andric int UnscaledLdOffset = 119181ad6265SDimitry Andric IsUnscaled 119281ad6265SDimitry Andric ? AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() 119381ad6265SDimitry Andric : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize; 119481ad6265SDimitry Andric int UnscaledStOffset = 119581ad6265SDimitry Andric IsUnscaled 119681ad6265SDimitry Andric ? AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() 119781ad6265SDimitry Andric : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize; 11980b57cec5SDimitry Andric int Width = LoadSize * 8; 119904eeddc0SDimitry Andric Register DestReg = 12008bcb0991SDimitry Andric IsStoreXReg ? Register(TRI->getMatchingSuperReg( 12018bcb0991SDimitry Andric LdRt, AArch64::sub_32, &AArch64::GPR64RegClass)) 12020b57cec5SDimitry Andric : LdRt; 12030b57cec5SDimitry Andric 12040b57cec5SDimitry Andric assert((UnscaledLdOffset >= UnscaledStOffset && 12050b57cec5SDimitry Andric (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && 12060b57cec5SDimitry Andric "Invalid offset"); 12070b57cec5SDimitry Andric 12080b57cec5SDimitry Andric int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); 12090b57cec5SDimitry Andric int Imms = Immr + Width - 1; 12100b57cec5SDimitry Andric if (UnscaledLdOffset == UnscaledStOffset) { 12110b57cec5SDimitry Andric uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N 12120b57cec5SDimitry Andric | ((Immr) << 6) // immr 12130b57cec5SDimitry Andric | ((Imms) << 0) // imms 12140b57cec5SDimitry Andric ; 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric BitExtMI = 12170b57cec5SDimitry Andric BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), 12180b57cec5SDimitry Andric TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), 12190b57cec5SDimitry Andric DestReg) 12200b57cec5SDimitry Andric .add(StMO) 12210b57cec5SDimitry Andric .addImm(AndMaskEncoded) 12220b57cec5SDimitry Andric .setMIFlags(LoadI->getFlags()); 12230b57cec5SDimitry Andric } else { 12240b57cec5SDimitry Andric BitExtMI = 12250b57cec5SDimitry Andric BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), 12260b57cec5SDimitry Andric TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), 12270b57cec5SDimitry Andric DestReg) 12280b57cec5SDimitry Andric .add(StMO) 12290b57cec5SDimitry Andric .addImm(Immr) 12300b57cec5SDimitry Andric .addImm(Imms) 12310b57cec5SDimitry Andric .setMIFlags(LoadI->getFlags()); 12320b57cec5SDimitry Andric } 12330b57cec5SDimitry Andric } 12340b57cec5SDimitry Andric 12350b57cec5SDimitry Andric // Clear kill flags between store and load. 12360b57cec5SDimitry Andric for (MachineInstr &MI : make_range(StoreI->getIterator(), 12370b57cec5SDimitry Andric BitExtMI->getIterator())) 12380b57cec5SDimitry Andric if (MI.killsRegister(StRt, TRI)) { 12390b57cec5SDimitry Andric MI.clearRegisterKills(StRt, TRI); 12400b57cec5SDimitry Andric break; 12410b57cec5SDimitry Andric } 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n "); 12440b57cec5SDimitry Andric LLVM_DEBUG(StoreI->print(dbgs())); 12450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " "); 12460b57cec5SDimitry Andric LLVM_DEBUG(LoadI->print(dbgs())); 12470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " with instructions:\n "); 12480b57cec5SDimitry Andric LLVM_DEBUG(StoreI->print(dbgs())); 12490b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " "); 12500b57cec5SDimitry Andric LLVM_DEBUG((BitExtMI)->print(dbgs())); 12510b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andric // Erase the old instructions. 12540b57cec5SDimitry Andric LoadI->eraseFromParent(); 12550b57cec5SDimitry Andric return NextI; 12560b57cec5SDimitry Andric } 12570b57cec5SDimitry Andric 12580b57cec5SDimitry Andric static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { 12590b57cec5SDimitry Andric // Convert the byte-offset used by unscaled into an "element" offset used 12600b57cec5SDimitry Andric // by the scaled pair load/store instructions. 12610b57cec5SDimitry Andric if (IsUnscaled) { 12620b57cec5SDimitry Andric // If the byte-offset isn't a multiple of the stride, there's no point 12630b57cec5SDimitry Andric // trying to match it. 12640b57cec5SDimitry Andric if (Offset % OffsetStride) 12650b57cec5SDimitry Andric return false; 12660b57cec5SDimitry Andric Offset /= OffsetStride; 12670b57cec5SDimitry Andric } 12680b57cec5SDimitry Andric return Offset <= 63 && Offset >= -64; 12690b57cec5SDimitry Andric } 12700b57cec5SDimitry Andric 12710b57cec5SDimitry Andric // Do alignment, specialized to power of 2 and for signed ints, 12720b57cec5SDimitry Andric // avoiding having to do a C-style cast from uint_64t to int when 12730b57cec5SDimitry Andric // using alignTo from include/llvm/Support/MathExtras.h. 12740b57cec5SDimitry Andric // FIXME: Move this function to include/MathExtras.h? 12750b57cec5SDimitry Andric static int alignTo(int Num, int PowOf2) { 12760b57cec5SDimitry Andric return (Num + PowOf2 - 1) & ~(PowOf2 - 1); 12770b57cec5SDimitry Andric } 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric static bool mayAlias(MachineInstr &MIa, 12800b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &MemInsns, 12810b57cec5SDimitry Andric AliasAnalysis *AA) { 1282297eecfbSDimitry Andric for (MachineInstr *MIb : MemInsns) { 1283297eecfbSDimitry Andric if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) { 1284297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump()); 12850b57cec5SDimitry Andric return true; 1286297eecfbSDimitry Andric } 1287297eecfbSDimitry Andric } 12880b57cec5SDimitry Andric 1289297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "No aliases found\n"); 12900b57cec5SDimitry Andric return false; 12910b57cec5SDimitry Andric } 12920b57cec5SDimitry Andric 12930b57cec5SDimitry Andric bool AArch64LoadStoreOpt::findMatchingStore( 12940b57cec5SDimitry Andric MachineBasicBlock::iterator I, unsigned Limit, 12950b57cec5SDimitry Andric MachineBasicBlock::iterator &StoreI) { 12960b57cec5SDimitry Andric MachineBasicBlock::iterator B = I->getParent()->begin(); 12970b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = I; 12980b57cec5SDimitry Andric MachineInstr &LoadMI = *I; 129981ad6265SDimitry Andric Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg(); 13000b57cec5SDimitry Andric 13010b57cec5SDimitry Andric // If the load is the first instruction in the block, there's obviously 13020b57cec5SDimitry Andric // not any matching store. 13030b57cec5SDimitry Andric if (MBBI == B) 13040b57cec5SDimitry Andric return false; 13050b57cec5SDimitry Andric 13060b57cec5SDimitry Andric // Track which register units have been modified and used between the first 13070b57cec5SDimitry Andric // insn and the second insn. 13080b57cec5SDimitry Andric ModifiedRegUnits.clear(); 13090b57cec5SDimitry Andric UsedRegUnits.clear(); 13100b57cec5SDimitry Andric 13110b57cec5SDimitry Andric unsigned Count = 0; 13120b57cec5SDimitry Andric do { 13135ffd83dbSDimitry Andric MBBI = prev_nodbg(MBBI, B); 13140b57cec5SDimitry Andric MachineInstr &MI = *MBBI; 13150b57cec5SDimitry Andric 13160b57cec5SDimitry Andric // Don't count transient instructions towards the search limit since there 13170b57cec5SDimitry Andric // may be different numbers of them if e.g. debug information is present. 13180b57cec5SDimitry Andric if (!MI.isTransient()) 13190b57cec5SDimitry Andric ++Count; 13200b57cec5SDimitry Andric 13210b57cec5SDimitry Andric // If the load instruction reads directly from the address to which the 13220b57cec5SDimitry Andric // store instruction writes and the stored value is not modified, we can 13230b57cec5SDimitry Andric // promote the load. Since we do not handle stores with pre-/post-index, 13240b57cec5SDimitry Andric // it's unnecessary to check if BaseReg is modified by the store itself. 1325e8d8bef9SDimitry Andric // Also we can't handle stores without an immediate offset operand, 1326e8d8bef9SDimitry Andric // while the operand might be the address for a global variable. 13270b57cec5SDimitry Andric if (MI.mayStore() && isMatchingStore(LoadMI, MI) && 132881ad6265SDimitry Andric BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() && 132981ad6265SDimitry Andric AArch64InstrInfo::getLdStOffsetOp(MI).isImm() && 13300b57cec5SDimitry Andric isLdOffsetInRangeOfSt(LoadMI, MI, TII) && 13310b57cec5SDimitry Andric ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) { 13320b57cec5SDimitry Andric StoreI = MBBI; 13330b57cec5SDimitry Andric return true; 13340b57cec5SDimitry Andric } 13350b57cec5SDimitry Andric 13360b57cec5SDimitry Andric if (MI.isCall()) 13370b57cec5SDimitry Andric return false; 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric // Update modified / uses register units. 13400b57cec5SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); 13410b57cec5SDimitry Andric 13420b57cec5SDimitry Andric // Otherwise, if the base register is modified, we have no match, so 13430b57cec5SDimitry Andric // return early. 13440b57cec5SDimitry Andric if (!ModifiedRegUnits.available(BaseReg)) 13450b57cec5SDimitry Andric return false; 13460b57cec5SDimitry Andric 13470b57cec5SDimitry Andric // If we encounter a store aliased with the load, return early. 13485ffd83dbSDimitry Andric if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false)) 13490b57cec5SDimitry Andric return false; 13500b57cec5SDimitry Andric } while (MBBI != B && Count < Limit); 13510b57cec5SDimitry Andric return false; 13520b57cec5SDimitry Andric } 13530b57cec5SDimitry Andric 1354bdd1243dSDimitry Andric static bool needsWinCFI(const MachineFunction *MF) { 1355bdd1243dSDimitry Andric return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() && 1356bdd1243dSDimitry Andric MF->getFunction().needsUnwindTableEntry(); 1357bdd1243dSDimitry Andric } 1358bdd1243dSDimitry Andric 13590b57cec5SDimitry Andric // Returns true if FirstMI and MI are candidates for merging or pairing. 13600b57cec5SDimitry Andric // Otherwise, returns false. 13610b57cec5SDimitry Andric static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, 13620b57cec5SDimitry Andric LdStPairFlags &Flags, 13630b57cec5SDimitry Andric const AArch64InstrInfo *TII) { 13640b57cec5SDimitry Andric // If this is volatile or if pairing is suppressed, not a candidate. 13650b57cec5SDimitry Andric if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) 13660b57cec5SDimitry Andric return false; 13670b57cec5SDimitry Andric 13680b57cec5SDimitry Andric // We should have already checked FirstMI for pair suppression and volatility. 13690b57cec5SDimitry Andric assert(!FirstMI.hasOrderedMemoryRef() && 13700b57cec5SDimitry Andric !TII->isLdStPairSuppressed(FirstMI) && 13710b57cec5SDimitry Andric "FirstMI shouldn't get here if either of these checks are true."); 13720b57cec5SDimitry Andric 1373bdd1243dSDimitry Andric if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) || 1374bdd1243dSDimitry Andric MI.getFlag(MachineInstr::FrameDestroy))) 1375bdd1243dSDimitry Andric return false; 1376bdd1243dSDimitry Andric 13770b57cec5SDimitry Andric unsigned OpcA = FirstMI.getOpcode(); 13780b57cec5SDimitry Andric unsigned OpcB = MI.getOpcode(); 13790b57cec5SDimitry Andric 1380fe6060f1SDimitry Andric // Opcodes match: If the opcodes are pre ld/st there is nothing more to check. 13810b57cec5SDimitry Andric if (OpcA == OpcB) 1382fe6060f1SDimitry Andric return !AArch64InstrInfo::isPreLdSt(FirstMI); 13830b57cec5SDimitry Andric 13845f757f3fSDimitry Andric // Two pre ld/st of different opcodes cannot be merged either 13855f757f3fSDimitry Andric if (AArch64InstrInfo::isPreLdSt(FirstMI) && AArch64InstrInfo::isPreLdSt(MI)) 13865f757f3fSDimitry Andric return false; 13875f757f3fSDimitry Andric 13880b57cec5SDimitry Andric // Try to match a sign-extended load/store with a zero-extended load/store. 13890b57cec5SDimitry Andric bool IsValidLdStrOpc, PairIsValidLdStrOpc; 13900b57cec5SDimitry Andric unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc); 13910b57cec5SDimitry Andric assert(IsValidLdStrOpc && 13920b57cec5SDimitry Andric "Given Opc should be a Load or Store with an immediate"); 13930b57cec5SDimitry Andric // OpcA will be the first instruction in the pair. 13940b57cec5SDimitry Andric if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) { 13950b57cec5SDimitry Andric Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0); 13960b57cec5SDimitry Andric return true; 13970b57cec5SDimitry Andric } 13980b57cec5SDimitry Andric 13990b57cec5SDimitry Andric // If the second instruction isn't even a mergable/pairable load/store, bail 14000b57cec5SDimitry Andric // out. 14010b57cec5SDimitry Andric if (!PairIsValidLdStrOpc) 14020b57cec5SDimitry Andric return false; 14030b57cec5SDimitry Andric 14040b57cec5SDimitry Andric // FIXME: We don't support merging narrow stores with mixed scaled/unscaled 14050b57cec5SDimitry Andric // offsets. 14060b57cec5SDimitry Andric if (isNarrowStore(OpcA) || isNarrowStore(OpcB)) 14070b57cec5SDimitry Andric return false; 14080b57cec5SDimitry Andric 1409fe6060f1SDimitry Andric // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and 14105f757f3fSDimitry Andric // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui 1411fe6060f1SDimitry Andric // are candidate pairs that can be merged. 1412fe6060f1SDimitry Andric if (isPreLdStPairCandidate(FirstMI, MI)) 1413fe6060f1SDimitry Andric return true; 1414fe6060f1SDimitry Andric 14150b57cec5SDimitry Andric // Try to match an unscaled load/store with a scaled load/store. 1416fe6060f1SDimitry Andric return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) && 14170b57cec5SDimitry Andric getMatchingPairOpcode(OpcA) == getMatchingPairOpcode(OpcB); 14180b57cec5SDimitry Andric 14190b57cec5SDimitry Andric // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair? 14200b57cec5SDimitry Andric } 14210b57cec5SDimitry Andric 14225f757f3fSDimitry Andric static bool canRenameMOP(const MachineOperand &MOP, 1423480093f4SDimitry Andric const TargetRegisterInfo *TRI) { 14245ffd83dbSDimitry Andric if (MOP.isReg()) { 14255ffd83dbSDimitry Andric auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg()); 14265ffd83dbSDimitry Andric // Renaming registers with multiple disjunct sub-registers (e.g. the 14275ffd83dbSDimitry Andric // result of a LD3) means that all sub-registers are renamed, potentially 14285ffd83dbSDimitry Andric // impacting other instructions we did not check. Bail out. 14295ffd83dbSDimitry Andric // Note that this relies on the structure of the AArch64 register file. In 14305ffd83dbSDimitry Andric // particular, a subregister cannot be written without overwriting the 14315ffd83dbSDimitry Andric // whole register. 14325ffd83dbSDimitry Andric if (RegClass->HasDisjunctSubRegs) { 14335ffd83dbSDimitry Andric LLVM_DEBUG( 14345ffd83dbSDimitry Andric dbgs() 14355ffd83dbSDimitry Andric << " Cannot rename operands with multiple disjunct subregisters (" 14365ffd83dbSDimitry Andric << MOP << ")\n"); 14375ffd83dbSDimitry Andric return false; 14385ffd83dbSDimitry Andric } 14395f757f3fSDimitry Andric 14405f757f3fSDimitry Andric // We cannot rename arbitrary implicit-defs, the specific rule to rewrite 14415f757f3fSDimitry Andric // them must be known. For example, in ORRWrs the implicit-def 14425f757f3fSDimitry Andric // corresponds to the result register. 14435f757f3fSDimitry Andric if (MOP.isImplicit() && MOP.isDef()) { 14445f757f3fSDimitry Andric if (!isRewritableImplicitDef(MOP.getParent()->getOpcode())) 14455f757f3fSDimitry Andric return false; 14465f757f3fSDimitry Andric return TRI->isSuperOrSubRegisterEq( 14475f757f3fSDimitry Andric MOP.getParent()->getOperand(0).getReg(), MOP.getReg()); 14485f757f3fSDimitry Andric } 14495ffd83dbSDimitry Andric } 1450480093f4SDimitry Andric return MOP.isImplicit() || 1451480093f4SDimitry Andric (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied()); 14525f757f3fSDimitry Andric } 14535f757f3fSDimitry Andric 14545f757f3fSDimitry Andric static bool 14555f757f3fSDimitry Andric canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, 14565f757f3fSDimitry Andric SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses, 14575f757f3fSDimitry Andric const TargetRegisterInfo *TRI) { 14585f757f3fSDimitry Andric if (!FirstMI.mayStore()) 14595f757f3fSDimitry Andric return false; 14605f757f3fSDimitry Andric 14615f757f3fSDimitry Andric // Check if we can find an unused register which we can use to rename 14625f757f3fSDimitry Andric // the register used by the first load/store. 14635f757f3fSDimitry Andric 14645f757f3fSDimitry Andric auto RegToRename = getLdStRegOp(FirstMI).getReg(); 14655f757f3fSDimitry Andric // For now, we only rename if the store operand gets killed at the store. 14665f757f3fSDimitry Andric if (!getLdStRegOp(FirstMI).isKill() && 14675f757f3fSDimitry Andric !any_of(FirstMI.operands(), 14685f757f3fSDimitry Andric [TRI, RegToRename](const MachineOperand &MOP) { 14695f757f3fSDimitry Andric return MOP.isReg() && !MOP.isDebug() && MOP.getReg() && 14705f757f3fSDimitry Andric MOP.isImplicit() && MOP.isKill() && 14715f757f3fSDimitry Andric TRI->regsOverlap(RegToRename, MOP.getReg()); 14725f757f3fSDimitry Andric })) { 14735f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI); 14745f757f3fSDimitry Andric return false; 14755f757f3fSDimitry Andric } 1476480093f4SDimitry Andric 1477480093f4SDimitry Andric bool FoundDef = false; 1478480093f4SDimitry Andric 1479480093f4SDimitry Andric // For each instruction between FirstMI and the previous def for RegToRename, 1480480093f4SDimitry Andric // we 1481480093f4SDimitry Andric // * check if we can rename RegToRename in this instruction 1482480093f4SDimitry Andric // * collect the registers used and required register classes for RegToRename. 1483480093f4SDimitry Andric std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI, 1484480093f4SDimitry Andric bool IsDef) { 14855f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Checking " << MI); 1486480093f4SDimitry Andric // Currently we do not try to rename across frame-setup instructions. 1487480093f4SDimitry Andric if (MI.getFlag(MachineInstr::FrameSetup)) { 14885f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions " 14895f757f3fSDimitry Andric << "currently\n"); 1490480093f4SDimitry Andric return false; 1491480093f4SDimitry Andric } 1492480093f4SDimitry Andric 1493480093f4SDimitry Andric UsedInBetween.accumulate(MI); 1494480093f4SDimitry Andric 1495480093f4SDimitry Andric // For a definition, check that we can rename the definition and exit the 1496480093f4SDimitry Andric // loop. 1497480093f4SDimitry Andric FoundDef = IsDef; 1498480093f4SDimitry Andric 1499480093f4SDimitry Andric // For defs, check if we can rename the first def of RegToRename. 1500480093f4SDimitry Andric if (FoundDef) { 150155e4f9d5SDimitry Andric // For some pseudo instructions, we might not generate code in the end 150255e4f9d5SDimitry Andric // (e.g. KILL) and we would end up without a correct def for the rename 150355e4f9d5SDimitry Andric // register. 150455e4f9d5SDimitry Andric // TODO: This might be overly conservative and we could handle those cases 150555e4f9d5SDimitry Andric // in multiple ways: 150655e4f9d5SDimitry Andric // 1. Insert an extra copy, to materialize the def. 150755e4f9d5SDimitry Andric // 2. Skip pseudo-defs until we find an non-pseudo def. 150855e4f9d5SDimitry Andric if (MI.isPseudo()) { 15095f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n"); 151055e4f9d5SDimitry Andric return false; 151155e4f9d5SDimitry Andric } 151255e4f9d5SDimitry Andric 1513480093f4SDimitry Andric for (auto &MOP : MI.operands()) { 1514480093f4SDimitry Andric if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() || 1515480093f4SDimitry Andric !TRI->regsOverlap(MOP.getReg(), RegToRename)) 1516480093f4SDimitry Andric continue; 15175f757f3fSDimitry Andric if (!canRenameMOP(MOP, TRI)) { 15185f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI); 1519480093f4SDimitry Andric return false; 1520480093f4SDimitry Andric } 1521480093f4SDimitry Andric RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); 1522480093f4SDimitry Andric } 1523480093f4SDimitry Andric return true; 1524480093f4SDimitry Andric } else { 1525480093f4SDimitry Andric for (auto &MOP : MI.operands()) { 1526480093f4SDimitry Andric if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() || 1527480093f4SDimitry Andric !TRI->regsOverlap(MOP.getReg(), RegToRename)) 1528480093f4SDimitry Andric continue; 1529480093f4SDimitry Andric 15305f757f3fSDimitry Andric if (!canRenameMOP(MOP, TRI)) { 15315f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI); 1532480093f4SDimitry Andric return false; 1533480093f4SDimitry Andric } 1534480093f4SDimitry Andric RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); 1535480093f4SDimitry Andric } 1536480093f4SDimitry Andric } 1537480093f4SDimitry Andric return true; 1538480093f4SDimitry Andric }; 1539480093f4SDimitry Andric 1540480093f4SDimitry Andric if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs)) 1541480093f4SDimitry Andric return false; 1542480093f4SDimitry Andric 1543480093f4SDimitry Andric if (!FoundDef) { 1544480093f4SDimitry Andric LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n"); 1545480093f4SDimitry Andric return false; 1546480093f4SDimitry Andric } 1547480093f4SDimitry Andric return true; 1548480093f4SDimitry Andric } 1549480093f4SDimitry Andric 15505f757f3fSDimitry Andric // We want to merge the second load into the first by rewriting the usages of 15515f757f3fSDimitry Andric // the same reg between first (incl.) and second (excl.). We don't need to care 15525f757f3fSDimitry Andric // about any insns before FirstLoad or after SecondLoad. 15535f757f3fSDimitry Andric // 1. The second load writes new value into the same reg. 15545f757f3fSDimitry Andric // - The renaming is impossible to impact later use of the reg. 15555f757f3fSDimitry Andric // - The second load always trash the value written by the first load which 15565f757f3fSDimitry Andric // means the reg must be killed before the second load. 15575f757f3fSDimitry Andric // 2. The first load must be a def for the same reg so we don't need to look 15585f757f3fSDimitry Andric // into anything before it. 15595f757f3fSDimitry Andric static bool canRenameUntilSecondLoad( 15605f757f3fSDimitry Andric MachineInstr &FirstLoad, MachineInstr &SecondLoad, 15615f757f3fSDimitry Andric LiveRegUnits &UsedInBetween, 15625f757f3fSDimitry Andric SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses, 15635f757f3fSDimitry Andric const TargetRegisterInfo *TRI) { 15645f757f3fSDimitry Andric if (FirstLoad.isPseudo()) 15655f757f3fSDimitry Andric return false; 15665f757f3fSDimitry Andric 15675f757f3fSDimitry Andric UsedInBetween.accumulate(FirstLoad); 15685f757f3fSDimitry Andric auto RegToRename = getLdStRegOp(FirstLoad).getReg(); 15695f757f3fSDimitry Andric bool Success = std::all_of( 15705f757f3fSDimitry Andric FirstLoad.getIterator(), SecondLoad.getIterator(), 15715f757f3fSDimitry Andric [&](MachineInstr &MI) { 15725f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Checking " << MI); 15735f757f3fSDimitry Andric // Currently we do not try to rename across frame-setup instructions. 15745f757f3fSDimitry Andric if (MI.getFlag(MachineInstr::FrameSetup)) { 15755f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions " 15765f757f3fSDimitry Andric << "currently\n"); 15775f757f3fSDimitry Andric return false; 15785f757f3fSDimitry Andric } 15795f757f3fSDimitry Andric 15805f757f3fSDimitry Andric for (auto &MOP : MI.operands()) { 15815f757f3fSDimitry Andric if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() || 15825f757f3fSDimitry Andric !TRI->regsOverlap(MOP.getReg(), RegToRename)) 15835f757f3fSDimitry Andric continue; 15845f757f3fSDimitry Andric if (!canRenameMOP(MOP, TRI)) { 15855f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI); 15865f757f3fSDimitry Andric return false; 15875f757f3fSDimitry Andric } 15885f757f3fSDimitry Andric RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); 15895f757f3fSDimitry Andric } 15905f757f3fSDimitry Andric 15915f757f3fSDimitry Andric return true; 15925f757f3fSDimitry Andric }); 15935f757f3fSDimitry Andric return Success; 15945f757f3fSDimitry Andric } 15955f757f3fSDimitry Andric 159681ad6265SDimitry Andric // Check if we can find a physical register for renaming \p Reg. This register 159781ad6265SDimitry Andric // must: 159881ad6265SDimitry Andric // * not be defined already in \p DefinedInBB; DefinedInBB must contain all 159981ad6265SDimitry Andric // defined registers up to the point where the renamed register will be used, 160081ad6265SDimitry Andric // * not used in \p UsedInBetween; UsedInBetween must contain all accessed 160181ad6265SDimitry Andric // registers in the range the rename register will be used, 1602480093f4SDimitry Andric // * is available in all used register classes (checked using RequiredClasses). 1603bdd1243dSDimitry Andric static std::optional<MCPhysReg> tryToFindRegisterToRename( 160481ad6265SDimitry Andric const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, 1605480093f4SDimitry Andric LiveRegUnits &UsedInBetween, 1606480093f4SDimitry Andric SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses, 1607480093f4SDimitry Andric const TargetRegisterInfo *TRI) { 160881ad6265SDimitry Andric const MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1609480093f4SDimitry Andric 1610480093f4SDimitry Andric // Checks if any sub- or super-register of PR is callee saved. 1611480093f4SDimitry Andric auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) { 1612480093f4SDimitry Andric return any_of(TRI->sub_and_superregs_inclusive(PR), 1613480093f4SDimitry Andric [&MF, TRI](MCPhysReg SubOrSuper) { 1614480093f4SDimitry Andric return TRI->isCalleeSavedPhysReg(SubOrSuper, MF); 1615480093f4SDimitry Andric }); 1616480093f4SDimitry Andric }; 1617480093f4SDimitry Andric 1618480093f4SDimitry Andric // Check if PR or one of its sub- or super-registers can be used for all 1619480093f4SDimitry Andric // required register classes. 1620480093f4SDimitry Andric auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) { 1621480093f4SDimitry Andric return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) { 16225f757f3fSDimitry Andric return any_of( 16235f757f3fSDimitry Andric TRI->sub_and_superregs_inclusive(PR), 16245f757f3fSDimitry Andric [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); }); 1625480093f4SDimitry Andric }); 1626480093f4SDimitry Andric }; 1627480093f4SDimitry Andric 162881ad6265SDimitry Andric auto *RegClass = TRI->getMinimalPhysRegClass(Reg); 1629480093f4SDimitry Andric for (const MCPhysReg &PR : *RegClass) { 1630480093f4SDimitry Andric if (DefinedInBB.available(PR) && UsedInBetween.available(PR) && 1631480093f4SDimitry Andric !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) && 1632480093f4SDimitry Andric CanBeUsedForAllClasses(PR)) { 1633480093f4SDimitry Andric DefinedInBB.addReg(PR); 1634480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI) 1635480093f4SDimitry Andric << "\n"); 1636480093f4SDimitry Andric return {PR}; 1637480093f4SDimitry Andric } 1638480093f4SDimitry Andric } 1639480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "No rename register found from " 1640480093f4SDimitry Andric << TRI->getRegClassName(RegClass) << "\n"); 1641bdd1243dSDimitry Andric return std::nullopt; 1642480093f4SDimitry Andric } 1643480093f4SDimitry Andric 16445f757f3fSDimitry Andric // For store pairs: returns a register from FirstMI to the beginning of the 16455f757f3fSDimitry Andric // block that can be renamed. 16465f757f3fSDimitry Andric // For load pairs: returns a register from FirstMI to MI that can be renamed. 16475f757f3fSDimitry Andric static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair( 16485f757f3fSDimitry Andric std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, 16495f757f3fSDimitry Andric Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, 16505f757f3fSDimitry Andric SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses, 16515f757f3fSDimitry Andric const TargetRegisterInfo *TRI) { 16525f757f3fSDimitry Andric std::optional<MCPhysReg> RenameReg; 16535f757f3fSDimitry Andric if (!DebugCounter::shouldExecute(RegRenamingCounter)) 16545f757f3fSDimitry Andric return RenameReg; 16555f757f3fSDimitry Andric 16565f757f3fSDimitry Andric auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); 16575f757f3fSDimitry Andric MachineFunction &MF = *FirstMI.getParent()->getParent(); 16585f757f3fSDimitry Andric if (!RegClass || !MF.getRegInfo().tracksLiveness()) 16595f757f3fSDimitry Andric return RenameReg; 16605f757f3fSDimitry Andric 16615f757f3fSDimitry Andric const bool IsLoad = FirstMI.mayLoad(); 16625f757f3fSDimitry Andric 16635f757f3fSDimitry Andric if (!MaybeCanRename) { 16645f757f3fSDimitry Andric if (IsLoad) 16655f757f3fSDimitry Andric MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween, 16665f757f3fSDimitry Andric RequiredClasses, TRI)}; 16675f757f3fSDimitry Andric else 16685f757f3fSDimitry Andric MaybeCanRename = { 16695f757f3fSDimitry Andric canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)}; 16705f757f3fSDimitry Andric } 16715f757f3fSDimitry Andric 16725f757f3fSDimitry Andric if (*MaybeCanRename) { 16735f757f3fSDimitry Andric RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween, 16745f757f3fSDimitry Andric RequiredClasses, TRI); 16755f757f3fSDimitry Andric } 16765f757f3fSDimitry Andric return RenameReg; 16775f757f3fSDimitry Andric } 16785f757f3fSDimitry Andric 16790b57cec5SDimitry Andric /// Scan the instructions looking for a load/store that can be combined with the 16800b57cec5SDimitry Andric /// current instruction into a wider equivalent or a load/store pair. 16810b57cec5SDimitry Andric MachineBasicBlock::iterator 16820b57cec5SDimitry Andric AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, 16830b57cec5SDimitry Andric LdStPairFlags &Flags, unsigned Limit, 16840b57cec5SDimitry Andric bool FindNarrowMerge) { 16850b57cec5SDimitry Andric MachineBasicBlock::iterator E = I->getParent()->end(); 16860b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = I; 1687480093f4SDimitry Andric MachineBasicBlock::iterator MBBIWithRenameReg; 16880b57cec5SDimitry Andric MachineInstr &FirstMI = *I; 16895ffd83dbSDimitry Andric MBBI = next_nodbg(MBBI, E); 16900b57cec5SDimitry Andric 16910b57cec5SDimitry Andric bool MayLoad = FirstMI.mayLoad(); 1692fe6060f1SDimitry Andric bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI); 16938bcb0991SDimitry Andric Register Reg = getLdStRegOp(FirstMI).getReg(); 169481ad6265SDimitry Andric Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg(); 169581ad6265SDimitry Andric int Offset = AArch64InstrInfo::getLdStOffsetOp(FirstMI).getImm(); 1696480093f4SDimitry Andric int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1; 16970b57cec5SDimitry Andric bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); 16980b57cec5SDimitry Andric 1699bdd1243dSDimitry Andric std::optional<bool> MaybeCanRename; 170013138422SDimitry Andric if (!EnableRenaming) 170113138422SDimitry Andric MaybeCanRename = {false}; 170213138422SDimitry Andric 1703480093f4SDimitry Andric SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses; 1704480093f4SDimitry Andric LiveRegUnits UsedInBetween; 1705480093f4SDimitry Andric UsedInBetween.init(*TRI); 1706480093f4SDimitry Andric 1707480093f4SDimitry Andric Flags.clearRenameReg(); 1708480093f4SDimitry Andric 17090b57cec5SDimitry Andric // Track which register units have been modified and used between the first 17100b57cec5SDimitry Andric // insn (inclusive) and the second insn. 17110b57cec5SDimitry Andric ModifiedRegUnits.clear(); 17120b57cec5SDimitry Andric UsedRegUnits.clear(); 17130b57cec5SDimitry Andric 17140b57cec5SDimitry Andric // Remember any instructions that read/write memory between FirstMI and MI. 17150b57cec5SDimitry Andric SmallVector<MachineInstr *, 4> MemInsns; 17160b57cec5SDimitry Andric 1717297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump()); 17185ffd83dbSDimitry Andric for (unsigned Count = 0; MBBI != E && Count < Limit; 17195ffd83dbSDimitry Andric MBBI = next_nodbg(MBBI, E)) { 17200b57cec5SDimitry Andric MachineInstr &MI = *MBBI; 1721297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump()); 17220b57cec5SDimitry Andric 1723480093f4SDimitry Andric UsedInBetween.accumulate(MI); 1724480093f4SDimitry Andric 17250b57cec5SDimitry Andric // Don't count transient instructions towards the search limit since there 17260b57cec5SDimitry Andric // may be different numbers of them if e.g. debug information is present. 17270b57cec5SDimitry Andric if (!MI.isTransient()) 17280b57cec5SDimitry Andric ++Count; 17290b57cec5SDimitry Andric 17300b57cec5SDimitry Andric Flags.setSExtIdx(-1); 17310b57cec5SDimitry Andric if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) && 173281ad6265SDimitry Andric AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) { 17330b57cec5SDimitry Andric assert(MI.mayLoadOrStore() && "Expected memory operation."); 17340b57cec5SDimitry Andric // If we've found another instruction with the same opcode, check to see 17350b57cec5SDimitry Andric // if the base and offset are compatible with our starting instruction. 17360b57cec5SDimitry Andric // These instructions all have scaled immediate operands, so we just 17370b57cec5SDimitry Andric // check for +1/-1. Make sure to check the new instruction offset is 17380b57cec5SDimitry Andric // actually an immediate and not a symbolic reference destined for 17390b57cec5SDimitry Andric // a relocation. 174081ad6265SDimitry Andric Register MIBaseReg = AArch64InstrInfo::getLdStBaseOp(MI).getReg(); 174181ad6265SDimitry Andric int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm(); 1742fe6060f1SDimitry Andric bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI); 17430b57cec5SDimitry Andric if (IsUnscaled != MIIsUnscaled) { 17440b57cec5SDimitry Andric // We're trying to pair instructions that differ in how they are scaled. 17450b57cec5SDimitry Andric // If FirstMI is scaled then scale the offset of MI accordingly. 17460b57cec5SDimitry Andric // Otherwise, do the opposite (i.e., make MI's offset unscaled). 1747480093f4SDimitry Andric int MemSize = TII->getMemScale(MI); 17480b57cec5SDimitry Andric if (MIIsUnscaled) { 17490b57cec5SDimitry Andric // If the unscaled offset isn't a multiple of the MemSize, we can't 17500b57cec5SDimitry Andric // pair the operations together: bail and keep looking. 17510b57cec5SDimitry Andric if (MIOffset % MemSize) { 17520b57cec5SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, 17530b57cec5SDimitry Andric UsedRegUnits, TRI); 17540b57cec5SDimitry Andric MemInsns.push_back(&MI); 17550b57cec5SDimitry Andric continue; 17560b57cec5SDimitry Andric } 17570b57cec5SDimitry Andric MIOffset /= MemSize; 17580b57cec5SDimitry Andric } else { 17590b57cec5SDimitry Andric MIOffset *= MemSize; 17600b57cec5SDimitry Andric } 17610b57cec5SDimitry Andric } 17620b57cec5SDimitry Andric 1763fe6060f1SDimitry Andric bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI); 1764fe6060f1SDimitry Andric 1765fe6060f1SDimitry Andric if (BaseReg == MIBaseReg) { 1766fe6060f1SDimitry Andric // If the offset of the second ld/st is not equal to the size of the 1767fe6060f1SDimitry Andric // destination register it can’t be paired with a pre-index ld/st 1768fe6060f1SDimitry Andric // pair. Additionally if the base reg is used or modified the operations 1769fe6060f1SDimitry Andric // can't be paired: bail and keep looking. 1770fe6060f1SDimitry Andric if (IsPreLdSt) { 1771fe6060f1SDimitry Andric bool IsOutOfBounds = MIOffset != TII->getMemScale(MI); 177281ad6265SDimitry Andric bool IsBaseRegUsed = !UsedRegUnits.available( 177381ad6265SDimitry Andric AArch64InstrInfo::getLdStBaseOp(MI).getReg()); 177481ad6265SDimitry Andric bool IsBaseRegModified = !ModifiedRegUnits.available( 177581ad6265SDimitry Andric AArch64InstrInfo::getLdStBaseOp(MI).getReg()); 1776fe6060f1SDimitry Andric // If the stored value and the address of the second instruction is 1777fe6060f1SDimitry Andric // the same, it needs to be using the updated register and therefore 1778fe6060f1SDimitry Andric // it must not be folded. 177981ad6265SDimitry Andric bool IsMIRegTheSame = 178081ad6265SDimitry Andric TRI->regsOverlap(getLdStRegOp(MI).getReg(), 178181ad6265SDimitry Andric AArch64InstrInfo::getLdStBaseOp(MI).getReg()); 1782fe6060f1SDimitry Andric if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified || 1783fe6060f1SDimitry Andric IsMIRegTheSame) { 1784fe6060f1SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, 1785fe6060f1SDimitry Andric UsedRegUnits, TRI); 1786fe6060f1SDimitry Andric MemInsns.push_back(&MI); 1787fe6060f1SDimitry Andric continue; 1788fe6060f1SDimitry Andric } 1789fe6060f1SDimitry Andric } else { 1790fe6060f1SDimitry Andric if ((Offset != MIOffset + OffsetStride) && 1791fe6060f1SDimitry Andric (Offset + OffsetStride != MIOffset)) { 1792fe6060f1SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, 1793fe6060f1SDimitry Andric UsedRegUnits, TRI); 1794fe6060f1SDimitry Andric MemInsns.push_back(&MI); 1795fe6060f1SDimitry Andric continue; 1796fe6060f1SDimitry Andric } 1797fe6060f1SDimitry Andric } 1798fe6060f1SDimitry Andric 17990b57cec5SDimitry Andric int MinOffset = Offset < MIOffset ? Offset : MIOffset; 18000b57cec5SDimitry Andric if (FindNarrowMerge) { 18010b57cec5SDimitry Andric // If the alignment requirements of the scaled wide load/store 18020b57cec5SDimitry Andric // instruction can't express the offset of the scaled narrow input, 18030b57cec5SDimitry Andric // bail and keep looking. For promotable zero stores, allow only when 18040b57cec5SDimitry Andric // the stored value is the same (i.e., WZR). 18050b57cec5SDimitry Andric if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) || 18060b57cec5SDimitry Andric (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) { 18070b57cec5SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, 18080b57cec5SDimitry Andric UsedRegUnits, TRI); 18090b57cec5SDimitry Andric MemInsns.push_back(&MI); 18100b57cec5SDimitry Andric continue; 18110b57cec5SDimitry Andric } 18120b57cec5SDimitry Andric } else { 18130b57cec5SDimitry Andric // Pairwise instructions have a 7-bit signed offset field. Single 18140b57cec5SDimitry Andric // insns have a 12-bit unsigned offset field. If the resultant 18150b57cec5SDimitry Andric // immediate offset of merging these instructions is out of range for 18160b57cec5SDimitry Andric // a pairwise instruction, bail and keep looking. 18170b57cec5SDimitry Andric if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) { 18180b57cec5SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, 18190b57cec5SDimitry Andric UsedRegUnits, TRI); 18200b57cec5SDimitry Andric MemInsns.push_back(&MI); 1821297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, " 1822297eecfbSDimitry Andric << "keep looking.\n"); 18230b57cec5SDimitry Andric continue; 18240b57cec5SDimitry Andric } 18250b57cec5SDimitry Andric // If the alignment requirements of the paired (scaled) instruction 18260b57cec5SDimitry Andric // can't express the offset of the unscaled input, bail and keep 18270b57cec5SDimitry Andric // looking. 18280b57cec5SDimitry Andric if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { 18290b57cec5SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, 18300b57cec5SDimitry Andric UsedRegUnits, TRI); 18310b57cec5SDimitry Andric MemInsns.push_back(&MI); 1832297eecfbSDimitry Andric LLVM_DEBUG(dbgs() 1833297eecfbSDimitry Andric << "Offset doesn't fit due to alignment requirements, " 1834297eecfbSDimitry Andric << "keep looking.\n"); 18350b57cec5SDimitry Andric continue; 18360b57cec5SDimitry Andric } 18370b57cec5SDimitry Andric } 18380b57cec5SDimitry Andric 1839e8d8bef9SDimitry Andric // If the BaseReg has been modified, then we cannot do the optimization. 1840e8d8bef9SDimitry Andric // For example, in the following pattern 1841e8d8bef9SDimitry Andric // ldr x1 [x2] 1842e8d8bef9SDimitry Andric // ldr x2 [x3] 1843e8d8bef9SDimitry Andric // ldr x4 [x2, #8], 1844e8d8bef9SDimitry Andric // the first and third ldr cannot be converted to ldp x1, x4, [x2] 1845e8d8bef9SDimitry Andric if (!ModifiedRegUnits.available(BaseReg)) 1846e8d8bef9SDimitry Andric return E; 1847e8d8bef9SDimitry Andric 18485f757f3fSDimitry Andric const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq( 18495f757f3fSDimitry Andric Reg, getLdStRegOp(MI).getReg()); 18505f757f3fSDimitry Andric 1851297eecfbSDimitry Andric // If the Rt of the second instruction (destination register of the 1852297eecfbSDimitry Andric // load) was not modified or used between the two instructions and none 1853297eecfbSDimitry Andric // of the instructions between the second and first alias with the 1854297eecfbSDimitry Andric // second, we can combine the second into the first. 1855297eecfbSDimitry Andric bool RtNotModified = 1856297eecfbSDimitry Andric ModifiedRegUnits.available(getLdStRegOp(MI).getReg()); 1857297eecfbSDimitry Andric bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg && 1858297eecfbSDimitry Andric !UsedRegUnits.available(getLdStRegOp(MI).getReg())); 1859297eecfbSDimitry Andric 1860297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n" 1861297eecfbSDimitry Andric << "Reg '" << getLdStRegOp(MI) << "' not modified: " 1862297eecfbSDimitry Andric << (RtNotModified ? "true" : "false") << "\n" 1863297eecfbSDimitry Andric << "Reg '" << getLdStRegOp(MI) << "' not used: " 1864297eecfbSDimitry Andric << (RtNotUsed ? "true" : "false") << "\n"); 1865297eecfbSDimitry Andric 1866297eecfbSDimitry Andric if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) { 18675f757f3fSDimitry Andric // For pairs loading into the same reg, try to find a renaming 18685f757f3fSDimitry Andric // opportunity to allow the renaming of Reg between FirstMI and MI 18695f757f3fSDimitry Andric // and combine MI into FirstMI; otherwise bail and keep looking. 18705f757f3fSDimitry Andric if (SameLoadReg) { 18715f757f3fSDimitry Andric std::optional<MCPhysReg> RenameReg = 18725f757f3fSDimitry Andric findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI, 18735f757f3fSDimitry Andric Reg, DefinedInBB, UsedInBetween, 18745f757f3fSDimitry Andric RequiredClasses, TRI); 18755f757f3fSDimitry Andric if (!RenameReg) { 18765f757f3fSDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, 18775f757f3fSDimitry Andric UsedRegUnits, TRI); 18785f757f3fSDimitry Andric MemInsns.push_back(&MI); 1879297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Can't find reg for renaming, " 1880297eecfbSDimitry Andric << "keep looking.\n"); 18815f757f3fSDimitry Andric continue; 18825f757f3fSDimitry Andric } 18835f757f3fSDimitry Andric Flags.setRenameReg(*RenameReg); 18845f757f3fSDimitry Andric } 1885480093f4SDimitry Andric 18860b57cec5SDimitry Andric Flags.setMergeForward(false); 18875f757f3fSDimitry Andric if (!SameLoadReg) 1888480093f4SDimitry Andric Flags.clearRenameReg(); 18890b57cec5SDimitry Andric return MBBI; 18900b57cec5SDimitry Andric } 18910b57cec5SDimitry Andric 18920b57cec5SDimitry Andric // Likewise, if the Rt of the first instruction is not modified or used 18930b57cec5SDimitry Andric // between the two instructions and none of the instructions between the 18940b57cec5SDimitry Andric // first and the second alias with the first, we can combine the first 18950b57cec5SDimitry Andric // into the second. 1896297eecfbSDimitry Andric RtNotModified = !( 1897297eecfbSDimitry Andric MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())); 1898480093f4SDimitry Andric 1899297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n" 1900297eecfbSDimitry Andric << "Reg '" << getLdStRegOp(FirstMI) 1901297eecfbSDimitry Andric << "' not modified: " 1902297eecfbSDimitry Andric << (RtNotModified ? "true" : "false") << "\n"); 1903297eecfbSDimitry Andric 1904297eecfbSDimitry Andric if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) { 1905480093f4SDimitry Andric if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) { 19060b57cec5SDimitry Andric Flags.setMergeForward(true); 1907480093f4SDimitry Andric Flags.clearRenameReg(); 19080b57cec5SDimitry Andric return MBBI; 19090b57cec5SDimitry Andric } 1910480093f4SDimitry Andric 19115f757f3fSDimitry Andric std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair( 19125f757f3fSDimitry Andric MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween, 1913bdd1243dSDimitry Andric RequiredClasses, TRI); 19145f757f3fSDimitry Andric if (RenameReg) { 1915480093f4SDimitry Andric Flags.setMergeForward(true); 19165f757f3fSDimitry Andric Flags.setRenameReg(*RenameReg); 1917480093f4SDimitry Andric MBBIWithRenameReg = MBBI; 1918480093f4SDimitry Andric } 1919480093f4SDimitry Andric } 1920297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to " 1921297eecfbSDimitry Andric << "interference in between, keep looking.\n"); 19220b57cec5SDimitry Andric } 19230b57cec5SDimitry Andric } 19240b57cec5SDimitry Andric 1925480093f4SDimitry Andric if (Flags.getRenameReg()) 1926480093f4SDimitry Andric return MBBIWithRenameReg; 1927480093f4SDimitry Andric 19280b57cec5SDimitry Andric // If the instruction wasn't a matching load or store. Stop searching if we 19290b57cec5SDimitry Andric // encounter a call instruction that might modify memory. 1930297eecfbSDimitry Andric if (MI.isCall()) { 1931297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n"); 19320b57cec5SDimitry Andric return E; 1933297eecfbSDimitry Andric } 19340b57cec5SDimitry Andric 19350b57cec5SDimitry Andric // Update modified / uses register units. 19360b57cec5SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); 19370b57cec5SDimitry Andric 19380b57cec5SDimitry Andric // Otherwise, if the base register is modified, we have no match, so 19390b57cec5SDimitry Andric // return early. 1940297eecfbSDimitry Andric if (!ModifiedRegUnits.available(BaseReg)) { 1941297eecfbSDimitry Andric LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n"); 19420b57cec5SDimitry Andric return E; 1943297eecfbSDimitry Andric } 19440b57cec5SDimitry Andric 19450b57cec5SDimitry Andric // Update list of instructions that read/write memory. 19460b57cec5SDimitry Andric if (MI.mayLoadOrStore()) 19470b57cec5SDimitry Andric MemInsns.push_back(&MI); 19480b57cec5SDimitry Andric } 19490b57cec5SDimitry Andric return E; 19500b57cec5SDimitry Andric } 19510b57cec5SDimitry Andric 195281ad6265SDimitry Andric static MachineBasicBlock::iterator 195381ad6265SDimitry Andric maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) { 195481ad6265SDimitry Andric auto End = MI.getParent()->end(); 195581ad6265SDimitry Andric if (MaybeCFI == End || 195681ad6265SDimitry Andric MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION || 195781ad6265SDimitry Andric !(MI.getFlag(MachineInstr::FrameSetup) || 195881ad6265SDimitry Andric MI.getFlag(MachineInstr::FrameDestroy)) || 195981ad6265SDimitry Andric AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP) 196081ad6265SDimitry Andric return End; 196181ad6265SDimitry Andric 196281ad6265SDimitry Andric const MachineFunction &MF = *MI.getParent()->getParent(); 196381ad6265SDimitry Andric unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex(); 196481ad6265SDimitry Andric const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex]; 196581ad6265SDimitry Andric switch (CFI.getOperation()) { 196681ad6265SDimitry Andric case MCCFIInstruction::OpDefCfa: 196781ad6265SDimitry Andric case MCCFIInstruction::OpDefCfaOffset: 196881ad6265SDimitry Andric return MaybeCFI; 196981ad6265SDimitry Andric default: 197081ad6265SDimitry Andric return End; 197181ad6265SDimitry Andric } 197281ad6265SDimitry Andric } 197381ad6265SDimitry Andric 19740b57cec5SDimitry Andric MachineBasicBlock::iterator 19750b57cec5SDimitry Andric AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, 19760b57cec5SDimitry Andric MachineBasicBlock::iterator Update, 19770b57cec5SDimitry Andric bool IsPreIdx) { 19780b57cec5SDimitry Andric assert((Update->getOpcode() == AArch64::ADDXri || 19790b57cec5SDimitry Andric Update->getOpcode() == AArch64::SUBXri) && 19800b57cec5SDimitry Andric "Unexpected base register update instruction to merge!"); 19815ffd83dbSDimitry Andric MachineBasicBlock::iterator E = I->getParent()->end(); 19825ffd83dbSDimitry Andric MachineBasicBlock::iterator NextI = next_nodbg(I, E); 198381ad6265SDimitry Andric 198481ad6265SDimitry Andric // If updating the SP and the following instruction is CFA offset related CFI 198581ad6265SDimitry Andric // instruction move it after the merged instruction. 198681ad6265SDimitry Andric MachineBasicBlock::iterator CFI = 198781ad6265SDimitry Andric IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E; 198881ad6265SDimitry Andric 19890b57cec5SDimitry Andric // Return the instruction following the merged instruction, which is 19900b57cec5SDimitry Andric // the instruction following our unmerged load. Unless that's the add/sub 19910b57cec5SDimitry Andric // instruction we're merging, in which case it's the one after that. 19925ffd83dbSDimitry Andric if (NextI == Update) 19935ffd83dbSDimitry Andric NextI = next_nodbg(NextI, E); 19940b57cec5SDimitry Andric 19950b57cec5SDimitry Andric int Value = Update->getOperand(2).getImm(); 19960b57cec5SDimitry Andric assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 19970b57cec5SDimitry Andric "Can't merge 1 << 12 offset into pre-/post-indexed load / store"); 19980b57cec5SDimitry Andric if (Update->getOpcode() == AArch64::SUBXri) 19990b57cec5SDimitry Andric Value = -Value; 20000b57cec5SDimitry Andric 20010b57cec5SDimitry Andric unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) 20020b57cec5SDimitry Andric : getPostIndexedOpcode(I->getOpcode()); 20030b57cec5SDimitry Andric MachineInstrBuilder MIB; 20048bcb0991SDimitry Andric int Scale, MinOffset, MaxOffset; 20058bcb0991SDimitry Andric getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset); 200681ad6265SDimitry Andric if (!AArch64InstrInfo::isPairedLdSt(*I)) { 20070b57cec5SDimitry Andric // Non-paired instruction. 20080b57cec5SDimitry Andric MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 20090b57cec5SDimitry Andric .add(getLdStRegOp(*Update)) 20100b57cec5SDimitry Andric .add(getLdStRegOp(*I)) 201181ad6265SDimitry Andric .add(AArch64InstrInfo::getLdStBaseOp(*I)) 20128bcb0991SDimitry Andric .addImm(Value / Scale) 20130b57cec5SDimitry Andric .setMemRefs(I->memoperands()) 20140b57cec5SDimitry Andric .setMIFlags(I->mergeFlagsWith(*Update)); 20150b57cec5SDimitry Andric } else { 20160b57cec5SDimitry Andric // Paired instruction. 20170b57cec5SDimitry Andric MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 20180b57cec5SDimitry Andric .add(getLdStRegOp(*Update)) 20190b57cec5SDimitry Andric .add(getLdStRegOp(*I, 0)) 20200b57cec5SDimitry Andric .add(getLdStRegOp(*I, 1)) 202181ad6265SDimitry Andric .add(AArch64InstrInfo::getLdStBaseOp(*I)) 20220b57cec5SDimitry Andric .addImm(Value / Scale) 20230b57cec5SDimitry Andric .setMemRefs(I->memoperands()) 20240b57cec5SDimitry Andric .setMIFlags(I->mergeFlagsWith(*Update)); 20250b57cec5SDimitry Andric } 202681ad6265SDimitry Andric if (CFI != E) { 202781ad6265SDimitry Andric MachineBasicBlock *MBB = I->getParent(); 202881ad6265SDimitry Andric MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI); 202981ad6265SDimitry Andric } 20300b57cec5SDimitry Andric 20310b57cec5SDimitry Andric if (IsPreIdx) { 20320b57cec5SDimitry Andric ++NumPreFolded; 20330b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store."); 20340b57cec5SDimitry Andric } else { 20350b57cec5SDimitry Andric ++NumPostFolded; 20360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Creating post-indexed load/store."); 20370b57cec5SDimitry Andric } 20380b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Replacing instructions:\n "); 20390b57cec5SDimitry Andric LLVM_DEBUG(I->print(dbgs())); 20400b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " "); 20410b57cec5SDimitry Andric LLVM_DEBUG(Update->print(dbgs())); 20420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " with instruction:\n "); 20430b57cec5SDimitry Andric LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); 20440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 20450b57cec5SDimitry Andric 20460b57cec5SDimitry Andric // Erase the old instructions for the block. 20470b57cec5SDimitry Andric I->eraseFromParent(); 20480b57cec5SDimitry Andric Update->eraseFromParent(); 20490b57cec5SDimitry Andric 20500b57cec5SDimitry Andric return NextI; 20510b57cec5SDimitry Andric } 20520b57cec5SDimitry Andric 20530b57cec5SDimitry Andric bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, 20540b57cec5SDimitry Andric MachineInstr &MI, 20550b57cec5SDimitry Andric unsigned BaseReg, int Offset) { 20560b57cec5SDimitry Andric switch (MI.getOpcode()) { 20570b57cec5SDimitry Andric default: 20580b57cec5SDimitry Andric break; 20590b57cec5SDimitry Andric case AArch64::SUBXri: 20600b57cec5SDimitry Andric case AArch64::ADDXri: 20610b57cec5SDimitry Andric // Make sure it's a vanilla immediate operand, not a relocation or 20620b57cec5SDimitry Andric // anything else we can't handle. 20630b57cec5SDimitry Andric if (!MI.getOperand(2).isImm()) 20640b57cec5SDimitry Andric break; 20650b57cec5SDimitry Andric // Watch out for 1 << 12 shifted value. 20660b57cec5SDimitry Andric if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm())) 20670b57cec5SDimitry Andric break; 20680b57cec5SDimitry Andric 20690b57cec5SDimitry Andric // The update instruction source and destination register must be the 20700b57cec5SDimitry Andric // same as the load/store base register. 20710b57cec5SDimitry Andric if (MI.getOperand(0).getReg() != BaseReg || 20720b57cec5SDimitry Andric MI.getOperand(1).getReg() != BaseReg) 20730b57cec5SDimitry Andric break; 20740b57cec5SDimitry Andric 20750b57cec5SDimitry Andric int UpdateOffset = MI.getOperand(2).getImm(); 20760b57cec5SDimitry Andric if (MI.getOpcode() == AArch64::SUBXri) 20770b57cec5SDimitry Andric UpdateOffset = -UpdateOffset; 20780b57cec5SDimitry Andric 20798bcb0991SDimitry Andric // The immediate must be a multiple of the scaling factor of the pre/post 20808bcb0991SDimitry Andric // indexed instruction. 20818bcb0991SDimitry Andric int Scale, MinOffset, MaxOffset; 20828bcb0991SDimitry Andric getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset); 20830b57cec5SDimitry Andric if (UpdateOffset % Scale != 0) 20840b57cec5SDimitry Andric break; 20850b57cec5SDimitry Andric 20868bcb0991SDimitry Andric // Scaled offset must fit in the instruction immediate. 20870b57cec5SDimitry Andric int ScaledOffset = UpdateOffset / Scale; 20888bcb0991SDimitry Andric if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset) 20890b57cec5SDimitry Andric break; 20900b57cec5SDimitry Andric 20910b57cec5SDimitry Andric // If we have a non-zero Offset, we check that it matches the amount 20920b57cec5SDimitry Andric // we're adding to the register. 20930b57cec5SDimitry Andric if (!Offset || Offset == UpdateOffset) 20940b57cec5SDimitry Andric return true; 20950b57cec5SDimitry Andric break; 20960b57cec5SDimitry Andric } 20970b57cec5SDimitry Andric return false; 20980b57cec5SDimitry Andric } 20990b57cec5SDimitry Andric 21000b57cec5SDimitry Andric MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( 21010b57cec5SDimitry Andric MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) { 21020b57cec5SDimitry Andric MachineBasicBlock::iterator E = I->getParent()->end(); 21030b57cec5SDimitry Andric MachineInstr &MemMI = *I; 21040b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = I; 21050b57cec5SDimitry Andric 210681ad6265SDimitry Andric Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg(); 210781ad6265SDimitry Andric int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() * 210881ad6265SDimitry Andric TII->getMemScale(MemMI); 21090b57cec5SDimitry Andric 21100b57cec5SDimitry Andric // Scan forward looking for post-index opportunities. Updating instructions 21110b57cec5SDimitry Andric // can't be formed if the memory instruction doesn't have the offset we're 21120b57cec5SDimitry Andric // looking for. 21130b57cec5SDimitry Andric if (MIUnscaledOffset != UnscaledOffset) 21140b57cec5SDimitry Andric return E; 21150b57cec5SDimitry Andric 21168bcb0991SDimitry Andric // If the base register overlaps a source/destination register, we can't 21178bcb0991SDimitry Andric // merge the update. This does not apply to tag store instructions which 21188bcb0991SDimitry Andric // ignore the address part of the source register. 21198bcb0991SDimitry Andric // This does not apply to STGPi as well, which does not have unpredictable 21208bcb0991SDimitry Andric // behavior in this case unlike normal stores, and always performs writeback 21218bcb0991SDimitry Andric // after reading the source register value. 21228bcb0991SDimitry Andric if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) { 212381ad6265SDimitry Andric bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI); 21240b57cec5SDimitry Andric for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { 21258bcb0991SDimitry Andric Register DestReg = getLdStRegOp(MemMI, i).getReg(); 21260b57cec5SDimitry Andric if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 21270b57cec5SDimitry Andric return E; 21280b57cec5SDimitry Andric } 21298bcb0991SDimitry Andric } 21300b57cec5SDimitry Andric 21310b57cec5SDimitry Andric // Track which register units have been modified and used between the first 21320b57cec5SDimitry Andric // insn (inclusive) and the second insn. 21330b57cec5SDimitry Andric ModifiedRegUnits.clear(); 21340b57cec5SDimitry Andric UsedRegUnits.clear(); 21355ffd83dbSDimitry Andric MBBI = next_nodbg(MBBI, E); 21365ffd83dbSDimitry Andric 21375ffd83dbSDimitry Andric // We can't post-increment the stack pointer if any instruction between 21385ffd83dbSDimitry Andric // the memory access (I) and the increment (MBBI) can access the memory 21395ffd83dbSDimitry Andric // region defined by [SP, MBBI]. 21405ffd83dbSDimitry Andric const bool BaseRegSP = BaseReg == AArch64::SP; 2141e8d8bef9SDimitry Andric if (BaseRegSP && needsWinCFI(I->getMF())) { 21425ffd83dbSDimitry Andric // FIXME: For now, we always block the optimization over SP in windows 21435ffd83dbSDimitry Andric // targets as it requires to adjust the unwind/debug info, messing up 21445ffd83dbSDimitry Andric // the unwind info can actually cause a miscompile. 21455ffd83dbSDimitry Andric return E; 21465ffd83dbSDimitry Andric } 21475ffd83dbSDimitry Andric 21485ffd83dbSDimitry Andric for (unsigned Count = 0; MBBI != E && Count < Limit; 21495ffd83dbSDimitry Andric MBBI = next_nodbg(MBBI, E)) { 21500b57cec5SDimitry Andric MachineInstr &MI = *MBBI; 21510b57cec5SDimitry Andric 21520b57cec5SDimitry Andric // Don't count transient instructions towards the search limit since there 21530b57cec5SDimitry Andric // may be different numbers of them if e.g. debug information is present. 21540b57cec5SDimitry Andric if (!MI.isTransient()) 21550b57cec5SDimitry Andric ++Count; 21560b57cec5SDimitry Andric 21570b57cec5SDimitry Andric // If we found a match, return it. 21580b57cec5SDimitry Andric if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset)) 21590b57cec5SDimitry Andric return MBBI; 21600b57cec5SDimitry Andric 21610b57cec5SDimitry Andric // Update the status of what the instruction clobbered and used. 21620b57cec5SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); 21630b57cec5SDimitry Andric 21640b57cec5SDimitry Andric // Otherwise, if the base register is used or modified, we have no match, so 21650b57cec5SDimitry Andric // return early. 21665ffd83dbSDimitry Andric // If we are optimizing SP, do not allow instructions that may load or store 21675ffd83dbSDimitry Andric // in between the load and the optimized value update. 21680b57cec5SDimitry Andric if (!ModifiedRegUnits.available(BaseReg) || 21695ffd83dbSDimitry Andric !UsedRegUnits.available(BaseReg) || 21705ffd83dbSDimitry Andric (BaseRegSP && MBBI->mayLoadOrStore())) 21710b57cec5SDimitry Andric return E; 21720b57cec5SDimitry Andric } 21730b57cec5SDimitry Andric return E; 21740b57cec5SDimitry Andric } 21750b57cec5SDimitry Andric 21760b57cec5SDimitry Andric MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( 21770b57cec5SDimitry Andric MachineBasicBlock::iterator I, unsigned Limit) { 21780b57cec5SDimitry Andric MachineBasicBlock::iterator B = I->getParent()->begin(); 21790b57cec5SDimitry Andric MachineBasicBlock::iterator E = I->getParent()->end(); 21800b57cec5SDimitry Andric MachineInstr &MemMI = *I; 21810b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = I; 2182fe6060f1SDimitry Andric MachineFunction &MF = *MemMI.getMF(); 21830b57cec5SDimitry Andric 218481ad6265SDimitry Andric Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg(); 218581ad6265SDimitry Andric int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm(); 21860b57cec5SDimitry Andric 21870b57cec5SDimitry Andric // If the load/store is the first instruction in the block, there's obviously 21880b57cec5SDimitry Andric // not any matching update. Ditto if the memory offset isn't zero. 21890b57cec5SDimitry Andric if (MBBI == B || Offset != 0) 21900b57cec5SDimitry Andric return E; 21910b57cec5SDimitry Andric // If the base register overlaps a destination register, we can't 21920b57cec5SDimitry Andric // merge the update. 21938bcb0991SDimitry Andric if (!isTagStore(MemMI)) { 219481ad6265SDimitry Andric bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI); 21950b57cec5SDimitry Andric for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { 21968bcb0991SDimitry Andric Register DestReg = getLdStRegOp(MemMI, i).getReg(); 21970b57cec5SDimitry Andric if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 21980b57cec5SDimitry Andric return E; 21990b57cec5SDimitry Andric } 22008bcb0991SDimitry Andric } 22010b57cec5SDimitry Andric 2202e8d8bef9SDimitry Andric const bool BaseRegSP = BaseReg == AArch64::SP; 2203e8d8bef9SDimitry Andric if (BaseRegSP && needsWinCFI(I->getMF())) { 2204e8d8bef9SDimitry Andric // FIXME: For now, we always block the optimization over SP in windows 2205e8d8bef9SDimitry Andric // targets as it requires to adjust the unwind/debug info, messing up 2206e8d8bef9SDimitry Andric // the unwind info can actually cause a miscompile. 2207e8d8bef9SDimitry Andric return E; 2208e8d8bef9SDimitry Andric } 2209e8d8bef9SDimitry Andric 2210fe6060f1SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 2211fe6060f1SDimitry Andric unsigned RedZoneSize = 2212fe6060f1SDimitry Andric Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction()); 2213fe6060f1SDimitry Andric 22140b57cec5SDimitry Andric // Track which register units have been modified and used between the first 22150b57cec5SDimitry Andric // insn (inclusive) and the second insn. 22160b57cec5SDimitry Andric ModifiedRegUnits.clear(); 22170b57cec5SDimitry Andric UsedRegUnits.clear(); 22180b57cec5SDimitry Andric unsigned Count = 0; 2219fe6060f1SDimitry Andric bool MemAcessBeforeSPPreInc = false; 22200b57cec5SDimitry Andric do { 22215ffd83dbSDimitry Andric MBBI = prev_nodbg(MBBI, B); 22220b57cec5SDimitry Andric MachineInstr &MI = *MBBI; 22230b57cec5SDimitry Andric 22240b57cec5SDimitry Andric // Don't count transient instructions towards the search limit since there 22250b57cec5SDimitry Andric // may be different numbers of them if e.g. debug information is present. 22260b57cec5SDimitry Andric if (!MI.isTransient()) 22270b57cec5SDimitry Andric ++Count; 22280b57cec5SDimitry Andric 22290b57cec5SDimitry Andric // If we found a match, return it. 2230fe6060f1SDimitry Andric if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) { 2231fe6060f1SDimitry Andric // Check that the update value is within our red zone limit (which may be 2232fe6060f1SDimitry Andric // zero). 2233fe6060f1SDimitry Andric if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize) 2234fe6060f1SDimitry Andric return E; 22350b57cec5SDimitry Andric return MBBI; 2236fe6060f1SDimitry Andric } 22370b57cec5SDimitry Andric 22380b57cec5SDimitry Andric // Update the status of what the instruction clobbered and used. 22390b57cec5SDimitry Andric LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); 22400b57cec5SDimitry Andric 22410b57cec5SDimitry Andric // Otherwise, if the base register is used or modified, we have no match, so 22420b57cec5SDimitry Andric // return early. 22430b57cec5SDimitry Andric if (!ModifiedRegUnits.available(BaseReg) || 22440b57cec5SDimitry Andric !UsedRegUnits.available(BaseReg)) 22450b57cec5SDimitry Andric return E; 2246fe6060f1SDimitry Andric // Keep track if we have a memory access before an SP pre-increment, in this 2247fe6060f1SDimitry Andric // case we need to validate later that the update amount respects the red 2248fe6060f1SDimitry Andric // zone. 2249fe6060f1SDimitry Andric if (BaseRegSP && MBBI->mayLoadOrStore()) 2250fe6060f1SDimitry Andric MemAcessBeforeSPPreInc = true; 22510b57cec5SDimitry Andric } while (MBBI != B && Count < Limit); 22520b57cec5SDimitry Andric return E; 22530b57cec5SDimitry Andric } 22540b57cec5SDimitry Andric 22550b57cec5SDimitry Andric bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( 22560b57cec5SDimitry Andric MachineBasicBlock::iterator &MBBI) { 22570b57cec5SDimitry Andric MachineInstr &MI = *MBBI; 22580b57cec5SDimitry Andric // If this is a volatile load, don't mess with it. 22590b57cec5SDimitry Andric if (MI.hasOrderedMemoryRef()) 22600b57cec5SDimitry Andric return false; 22610b57cec5SDimitry Andric 2262bdd1243dSDimitry Andric if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy)) 2263bdd1243dSDimitry Andric return false; 2264bdd1243dSDimitry Andric 22650b57cec5SDimitry Andric // Make sure this is a reg+imm. 22660b57cec5SDimitry Andric // FIXME: It is possible to extend it to handle reg+reg cases. 226781ad6265SDimitry Andric if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) 22680b57cec5SDimitry Andric return false; 22690b57cec5SDimitry Andric 22700b57cec5SDimitry Andric // Look backward up to LdStLimit instructions. 22710b57cec5SDimitry Andric MachineBasicBlock::iterator StoreI; 22720b57cec5SDimitry Andric if (findMatchingStore(MBBI, LdStLimit, StoreI)) { 22730b57cec5SDimitry Andric ++NumLoadsFromStoresPromoted; 22740b57cec5SDimitry Andric // Promote the load. Keeping the iterator straight is a 22750b57cec5SDimitry Andric // pain, so we let the merge routine tell us what the next instruction 22760b57cec5SDimitry Andric // is after it's done mucking about. 22770b57cec5SDimitry Andric MBBI = promoteLoadFromStore(MBBI, StoreI); 22780b57cec5SDimitry Andric return true; 22790b57cec5SDimitry Andric } 22800b57cec5SDimitry Andric return false; 22810b57cec5SDimitry Andric } 22820b57cec5SDimitry Andric 22830b57cec5SDimitry Andric // Merge adjacent zero stores into a wider store. 22840b57cec5SDimitry Andric bool AArch64LoadStoreOpt::tryToMergeZeroStInst( 22850b57cec5SDimitry Andric MachineBasicBlock::iterator &MBBI) { 22860b57cec5SDimitry Andric assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store."); 22870b57cec5SDimitry Andric MachineInstr &MI = *MBBI; 22880b57cec5SDimitry Andric MachineBasicBlock::iterator E = MI.getParent()->end(); 22890b57cec5SDimitry Andric 22900b57cec5SDimitry Andric if (!TII->isCandidateToMergeOrPair(MI)) 22910b57cec5SDimitry Andric return false; 22920b57cec5SDimitry Andric 22930b57cec5SDimitry Andric // Look ahead up to LdStLimit instructions for a mergable instruction. 22940b57cec5SDimitry Andric LdStPairFlags Flags; 22950b57cec5SDimitry Andric MachineBasicBlock::iterator MergeMI = 22960b57cec5SDimitry Andric findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true); 22970b57cec5SDimitry Andric if (MergeMI != E) { 22980b57cec5SDimitry Andric ++NumZeroStoresPromoted; 22990b57cec5SDimitry Andric 23000b57cec5SDimitry Andric // Keeping the iterator straight is a pain, so we let the merge routine tell 23010b57cec5SDimitry Andric // us what the next instruction is after it's done mucking about. 23020b57cec5SDimitry Andric MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags); 23030b57cec5SDimitry Andric return true; 23040b57cec5SDimitry Andric } 23050b57cec5SDimitry Andric return false; 23060b57cec5SDimitry Andric } 23070b57cec5SDimitry Andric 23080b57cec5SDimitry Andric // Find loads and stores that can be merged into a single load or store pair 23090b57cec5SDimitry Andric // instruction. 23100b57cec5SDimitry Andric bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { 23110b57cec5SDimitry Andric MachineInstr &MI = *MBBI; 23120b57cec5SDimitry Andric MachineBasicBlock::iterator E = MI.getParent()->end(); 23130b57cec5SDimitry Andric 23140b57cec5SDimitry Andric if (!TII->isCandidateToMergeOrPair(MI)) 23150b57cec5SDimitry Andric return false; 23160b57cec5SDimitry Andric 23175f757f3fSDimitry Andric // If disable-ldp feature is opted, do not emit ldp. 23185f757f3fSDimitry Andric if (MI.mayLoad() && Subtarget->hasDisableLdp()) 23195f757f3fSDimitry Andric return false; 23205f757f3fSDimitry Andric 23215f757f3fSDimitry Andric // If disable-stp feature is opted, do not emit stp. 23225f757f3fSDimitry Andric if (MI.mayStore() && Subtarget->hasDisableStp()) 23235f757f3fSDimitry Andric return false; 23245f757f3fSDimitry Andric 23250b57cec5SDimitry Andric // Early exit if the offset is not possible to match. (6 bits of positive 23260b57cec5SDimitry Andric // range, plus allow an extra one in case we find a later insn that matches 23270b57cec5SDimitry Andric // with Offset-1) 2328fe6060f1SDimitry Andric bool IsUnscaled = TII->hasUnscaledLdStOffset(MI); 232981ad6265SDimitry Andric int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm(); 2330480093f4SDimitry Andric int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1; 23310b57cec5SDimitry Andric // Allow one more for offset. 23320b57cec5SDimitry Andric if (Offset > 0) 23330b57cec5SDimitry Andric Offset -= OffsetStride; 23340b57cec5SDimitry Andric if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) 23350b57cec5SDimitry Andric return false; 23360b57cec5SDimitry Andric 23370b57cec5SDimitry Andric // Look ahead up to LdStLimit instructions for a pairable instruction. 23380b57cec5SDimitry Andric LdStPairFlags Flags; 23390b57cec5SDimitry Andric MachineBasicBlock::iterator Paired = 23400b57cec5SDimitry Andric findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false); 23410b57cec5SDimitry Andric if (Paired != E) { 23420b57cec5SDimitry Andric // Keeping the iterator straight is a pain, so we let the merge routine tell 23430b57cec5SDimitry Andric // us what the next instruction is after it's done mucking about. 2344480093f4SDimitry Andric auto Prev = std::prev(MBBI); 23455f757f3fSDimitry Andric 23465f757f3fSDimitry Andric // Fetch the memoperand of the load/store that is a candidate for 23475f757f3fSDimitry Andric // combination. 23485f757f3fSDimitry Andric MachineMemOperand *MemOp = 23495f757f3fSDimitry Andric MI.memoperands_empty() ? nullptr : MI.memoperands().front(); 23505f757f3fSDimitry Andric 2351*0fca6ea1SDimitry Andric // If a load/store arrives and ldp/stp-aligned-only feature is opted, check 2352*0fca6ea1SDimitry Andric // that the alignment of the source pointer is at least double the alignment 2353*0fca6ea1SDimitry Andric // of the type. 2354*0fca6ea1SDimitry Andric if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) || 2355*0fca6ea1SDimitry Andric (MI.mayStore() && Subtarget->hasStpAlignedOnly())) { 2356*0fca6ea1SDimitry Andric // If there is no size/align information, cancel the transformation. 2357*0fca6ea1SDimitry Andric if (!MemOp || !MemOp->getMemoryType().isValid()) { 2358*0fca6ea1SDimitry Andric NumFailedAlignmentCheck++; 2359*0fca6ea1SDimitry Andric return false; 2360*0fca6ea1SDimitry Andric } 2361*0fca6ea1SDimitry Andric 23625f757f3fSDimitry Andric // Get the needed alignments to check them if 23635f757f3fSDimitry Andric // ldp-aligned-only/stp-aligned-only features are opted. 2364*0fca6ea1SDimitry Andric uint64_t MemAlignment = MemOp->getAlign().value(); 2365*0fca6ea1SDimitry Andric uint64_t TypeAlignment = Align(MemOp->getSize().getValue()).value(); 23665f757f3fSDimitry Andric 2367*0fca6ea1SDimitry Andric if (MemAlignment < 2 * TypeAlignment) { 2368*0fca6ea1SDimitry Andric NumFailedAlignmentCheck++; 23695f757f3fSDimitry Andric return false; 2370*0fca6ea1SDimitry Andric } 2371*0fca6ea1SDimitry Andric } 23725f757f3fSDimitry Andric 2373*0fca6ea1SDimitry Andric ++NumPairCreated; 2374*0fca6ea1SDimitry Andric if (TII->hasUnscaledLdStOffset(MI)) 2375*0fca6ea1SDimitry Andric ++NumUnscaledPairCreated; 23765f757f3fSDimitry Andric 23770b57cec5SDimitry Andric MBBI = mergePairedInsns(MBBI, Paired, Flags); 2378480093f4SDimitry Andric // Collect liveness info for instructions between Prev and the new position 2379480093f4SDimitry Andric // MBBI. 2380480093f4SDimitry Andric for (auto I = std::next(Prev); I != MBBI; I++) 2381480093f4SDimitry Andric updateDefinedRegisters(*I, DefinedInBB, TRI); 2382480093f4SDimitry Andric 23830b57cec5SDimitry Andric return true; 23840b57cec5SDimitry Andric } 23850b57cec5SDimitry Andric return false; 23860b57cec5SDimitry Andric } 23870b57cec5SDimitry Andric 23880b57cec5SDimitry Andric bool AArch64LoadStoreOpt::tryToMergeLdStUpdate 23890b57cec5SDimitry Andric (MachineBasicBlock::iterator &MBBI) { 23900b57cec5SDimitry Andric MachineInstr &MI = *MBBI; 23910b57cec5SDimitry Andric MachineBasicBlock::iterator E = MI.getParent()->end(); 23920b57cec5SDimitry Andric MachineBasicBlock::iterator Update; 23930b57cec5SDimitry Andric 23940b57cec5SDimitry Andric // Look forward to try to form a post-index instruction. For example, 23950b57cec5SDimitry Andric // ldr x0, [x20] 23960b57cec5SDimitry Andric // add x20, x20, #32 23970b57cec5SDimitry Andric // merged into: 23980b57cec5SDimitry Andric // ldr x0, [x20], #32 23990b57cec5SDimitry Andric Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit); 24000b57cec5SDimitry Andric if (Update != E) { 24010b57cec5SDimitry Andric // Merge the update into the ld/st. 24020b57cec5SDimitry Andric MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); 24030b57cec5SDimitry Andric return true; 24040b57cec5SDimitry Andric } 24050b57cec5SDimitry Andric 24060b57cec5SDimitry Andric // Don't know how to handle unscaled pre/post-index versions below, so bail. 2407fe6060f1SDimitry Andric if (TII->hasUnscaledLdStOffset(MI.getOpcode())) 24080b57cec5SDimitry Andric return false; 24090b57cec5SDimitry Andric 24100b57cec5SDimitry Andric // Look back to try to find a pre-index instruction. For example, 24110b57cec5SDimitry Andric // add x0, x0, #8 24120b57cec5SDimitry Andric // ldr x1, [x0] 24130b57cec5SDimitry Andric // merged into: 24140b57cec5SDimitry Andric // ldr x1, [x0, #8]! 24150b57cec5SDimitry Andric Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit); 24160b57cec5SDimitry Andric if (Update != E) { 24170b57cec5SDimitry Andric // Merge the update into the ld/st. 24180b57cec5SDimitry Andric MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); 24190b57cec5SDimitry Andric return true; 24200b57cec5SDimitry Andric } 24210b57cec5SDimitry Andric 24220b57cec5SDimitry Andric // The immediate in the load/store is scaled by the size of the memory 24230b57cec5SDimitry Andric // operation. The immediate in the add we're looking for, 24240b57cec5SDimitry Andric // however, is not, so adjust here. 242581ad6265SDimitry Andric int UnscaledOffset = 242681ad6265SDimitry Andric AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI); 24270b57cec5SDimitry Andric 24288bcb0991SDimitry Andric // Look forward to try to find a pre-index instruction. For example, 24290b57cec5SDimitry Andric // ldr x1, [x0, #64] 24300b57cec5SDimitry Andric // add x0, x0, #64 24310b57cec5SDimitry Andric // merged into: 24320b57cec5SDimitry Andric // ldr x1, [x0, #64]! 24330b57cec5SDimitry Andric Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit); 24340b57cec5SDimitry Andric if (Update != E) { 24350b57cec5SDimitry Andric // Merge the update into the ld/st. 24360b57cec5SDimitry Andric MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); 24370b57cec5SDimitry Andric return true; 24380b57cec5SDimitry Andric } 24390b57cec5SDimitry Andric 24400b57cec5SDimitry Andric return false; 24410b57cec5SDimitry Andric } 24420b57cec5SDimitry Andric 24430b57cec5SDimitry Andric bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, 24440b57cec5SDimitry Andric bool EnableNarrowZeroStOpt) { 2445480093f4SDimitry Andric 24460b57cec5SDimitry Andric bool Modified = false; 24470b57cec5SDimitry Andric // Four tranformations to do here: 24480b57cec5SDimitry Andric // 1) Find loads that directly read from stores and promote them by 24490b57cec5SDimitry Andric // replacing with mov instructions. If the store is wider than the load, 24500b57cec5SDimitry Andric // the load will be replaced with a bitfield extract. 24510b57cec5SDimitry Andric // e.g., 24520b57cec5SDimitry Andric // str w1, [x0, #4] 24530b57cec5SDimitry Andric // ldrh w2, [x0, #6] 24540b57cec5SDimitry Andric // ; becomes 24550b57cec5SDimitry Andric // str w1, [x0, #4] 24560b57cec5SDimitry Andric // lsr w2, w1, #16 24570b57cec5SDimitry Andric for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 24580b57cec5SDimitry Andric MBBI != E;) { 24590b57cec5SDimitry Andric if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI)) 24600b57cec5SDimitry Andric Modified = true; 24610b57cec5SDimitry Andric else 24620b57cec5SDimitry Andric ++MBBI; 24630b57cec5SDimitry Andric } 24640b57cec5SDimitry Andric // 2) Merge adjacent zero stores into a wider store. 24650b57cec5SDimitry Andric // e.g., 24660b57cec5SDimitry Andric // strh wzr, [x0] 24670b57cec5SDimitry Andric // strh wzr, [x0, #2] 24680b57cec5SDimitry Andric // ; becomes 24690b57cec5SDimitry Andric // str wzr, [x0] 24700b57cec5SDimitry Andric // e.g., 24710b57cec5SDimitry Andric // str wzr, [x0] 24720b57cec5SDimitry Andric // str wzr, [x0, #4] 24730b57cec5SDimitry Andric // ; becomes 24740b57cec5SDimitry Andric // str xzr, [x0] 24750b57cec5SDimitry Andric if (EnableNarrowZeroStOpt) 24760b57cec5SDimitry Andric for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 24770b57cec5SDimitry Andric MBBI != E;) { 24780b57cec5SDimitry Andric if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI)) 24790b57cec5SDimitry Andric Modified = true; 24800b57cec5SDimitry Andric else 24810b57cec5SDimitry Andric ++MBBI; 24820b57cec5SDimitry Andric } 24830b57cec5SDimitry Andric // 3) Find loads and stores that can be merged into a single load or store 24840b57cec5SDimitry Andric // pair instruction. 24850b57cec5SDimitry Andric // e.g., 24860b57cec5SDimitry Andric // ldr x0, [x2] 24870b57cec5SDimitry Andric // ldr x1, [x2, #8] 24880b57cec5SDimitry Andric // ; becomes 24890b57cec5SDimitry Andric // ldp x0, x1, [x2] 2490480093f4SDimitry Andric 2491480093f4SDimitry Andric if (MBB.getParent()->getRegInfo().tracksLiveness()) { 2492480093f4SDimitry Andric DefinedInBB.clear(); 2493480093f4SDimitry Andric DefinedInBB.addLiveIns(MBB); 2494480093f4SDimitry Andric } 2495480093f4SDimitry Andric 24960b57cec5SDimitry Andric for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 24970b57cec5SDimitry Andric MBBI != E;) { 2498480093f4SDimitry Andric // Track currently live registers up to this point, to help with 2499480093f4SDimitry Andric // searching for a rename register on demand. 2500480093f4SDimitry Andric updateDefinedRegisters(*MBBI, DefinedInBB, TRI); 25010b57cec5SDimitry Andric if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI)) 25020b57cec5SDimitry Andric Modified = true; 25030b57cec5SDimitry Andric else 25040b57cec5SDimitry Andric ++MBBI; 25050b57cec5SDimitry Andric } 25060b57cec5SDimitry Andric // 4) Find base register updates that can be merged into the load or store 25070b57cec5SDimitry Andric // as a base-reg writeback. 25080b57cec5SDimitry Andric // e.g., 25090b57cec5SDimitry Andric // ldr x0, [x2] 25100b57cec5SDimitry Andric // add x2, x2, #4 25110b57cec5SDimitry Andric // ; becomes 25120b57cec5SDimitry Andric // ldr x0, [x2], #4 25130b57cec5SDimitry Andric for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 25140b57cec5SDimitry Andric MBBI != E;) { 25150b57cec5SDimitry Andric if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI)) 25160b57cec5SDimitry Andric Modified = true; 25170b57cec5SDimitry Andric else 25180b57cec5SDimitry Andric ++MBBI; 25190b57cec5SDimitry Andric } 25200b57cec5SDimitry Andric 25210b57cec5SDimitry Andric return Modified; 25220b57cec5SDimitry Andric } 25230b57cec5SDimitry Andric 25240b57cec5SDimitry Andric bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 25250b57cec5SDimitry Andric if (skipFunction(Fn.getFunction())) 25260b57cec5SDimitry Andric return false; 25270b57cec5SDimitry Andric 252881ad6265SDimitry Andric Subtarget = &Fn.getSubtarget<AArch64Subtarget>(); 25290b57cec5SDimitry Andric TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo()); 25300b57cec5SDimitry Andric TRI = Subtarget->getRegisterInfo(); 25310b57cec5SDimitry Andric AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 25320b57cec5SDimitry Andric 25330b57cec5SDimitry Andric // Resize the modified and used register unit trackers. We do this once 25340b57cec5SDimitry Andric // per function and then clear the register units each time we optimize a load 25350b57cec5SDimitry Andric // or store. 25360b57cec5SDimitry Andric ModifiedRegUnits.init(*TRI); 25370b57cec5SDimitry Andric UsedRegUnits.init(*TRI); 2538480093f4SDimitry Andric DefinedInBB.init(*TRI); 25390b57cec5SDimitry Andric 25400b57cec5SDimitry Andric bool Modified = false; 25410b57cec5SDimitry Andric bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign(); 2542480093f4SDimitry Andric for (auto &MBB : Fn) { 2543480093f4SDimitry Andric auto M = optimizeBlock(MBB, enableNarrowZeroStOpt); 2544480093f4SDimitry Andric Modified |= M; 2545480093f4SDimitry Andric } 25460b57cec5SDimitry Andric 25470b57cec5SDimitry Andric return Modified; 25480b57cec5SDimitry Andric } 25490b57cec5SDimitry Andric 25500b57cec5SDimitry Andric // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and 25510b57cec5SDimitry Andric // stores near one another? Note: The pre-RA instruction scheduler already has 25520b57cec5SDimitry Andric // hooks to try and schedule pairable loads/stores together to improve pairing 25530b57cec5SDimitry Andric // opportunities. Thus, pre-RA pairing pass may not be worth the effort. 25540b57cec5SDimitry Andric 25550b57cec5SDimitry Andric // FIXME: When pairing store instructions it's very possible for this pass to 25560b57cec5SDimitry Andric // hoist a store with a KILL marker above another use (without a KILL marker). 25570b57cec5SDimitry Andric // The resulting IR is invalid, but nothing uses the KILL markers after this 25580b57cec5SDimitry Andric // pass, so it's never caused a problem in practice. 25590b57cec5SDimitry Andric 25600b57cec5SDimitry Andric /// createAArch64LoadStoreOptimizationPass - returns an instance of the 25610b57cec5SDimitry Andric /// load / store optimization pass. 25620b57cec5SDimitry Andric FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { 25630b57cec5SDimitry Andric return new AArch64LoadStoreOpt(); 25640b57cec5SDimitry Andric } 2565