xref: /openbsd-src/gnu/llvm/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file contains a pass that performs load / store related peephole
1009467b48Spatrick // optimizations. This pass should be run after register allocation.
1109467b48Spatrick //
12*d415bd75Srobert // The pass runs after the PrologEpilogInserter where we emit the CFI
13*d415bd75Srobert // instructions. In order to preserve the correctness of the unwind informaiton,
14*d415bd75Srobert // the pass should not change the order of any two instructions, one of which
15*d415bd75Srobert // has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16*d415bd75Srobert // to unwind information.
17*d415bd75Srobert //
1809467b48Spatrick //===----------------------------------------------------------------------===//
1909467b48Spatrick 
2009467b48Spatrick #include "AArch64InstrInfo.h"
2173471bf0Spatrick #include "AArch64MachineFunctionInfo.h"
2209467b48Spatrick #include "AArch64Subtarget.h"
2309467b48Spatrick #include "MCTargetDesc/AArch64AddressingModes.h"
2409467b48Spatrick #include "llvm/ADT/BitVector.h"
2509467b48Spatrick #include "llvm/ADT/SmallVector.h"
2609467b48Spatrick #include "llvm/ADT/Statistic.h"
2709467b48Spatrick #include "llvm/ADT/StringRef.h"
2809467b48Spatrick #include "llvm/ADT/iterator_range.h"
2909467b48Spatrick #include "llvm/Analysis/AliasAnalysis.h"
3009467b48Spatrick #include "llvm/CodeGen/MachineBasicBlock.h"
3109467b48Spatrick #include "llvm/CodeGen/MachineFunction.h"
3209467b48Spatrick #include "llvm/CodeGen/MachineFunctionPass.h"
3309467b48Spatrick #include "llvm/CodeGen/MachineInstr.h"
3409467b48Spatrick #include "llvm/CodeGen/MachineInstrBuilder.h"
3509467b48Spatrick #include "llvm/CodeGen/MachineOperand.h"
3609467b48Spatrick #include "llvm/CodeGen/MachineRegisterInfo.h"
3709467b48Spatrick #include "llvm/CodeGen/TargetRegisterInfo.h"
3809467b48Spatrick #include "llvm/IR/DebugLoc.h"
39097a140dSpatrick #include "llvm/MC/MCAsmInfo.h"
40*d415bd75Srobert #include "llvm/MC/MCDwarf.h"
4109467b48Spatrick #include "llvm/MC/MCRegisterInfo.h"
4209467b48Spatrick #include "llvm/Pass.h"
4309467b48Spatrick #include "llvm/Support/CommandLine.h"
4409467b48Spatrick #include "llvm/Support/Debug.h"
4509467b48Spatrick #include "llvm/Support/DebugCounter.h"
4609467b48Spatrick #include "llvm/Support/ErrorHandling.h"
4709467b48Spatrick #include "llvm/Support/raw_ostream.h"
4809467b48Spatrick #include <cassert>
4909467b48Spatrick #include <cstdint>
5009467b48Spatrick #include <functional>
5109467b48Spatrick #include <iterator>
5209467b48Spatrick #include <limits>
53*d415bd75Srobert #include <optional>
5409467b48Spatrick 
5509467b48Spatrick using namespace llvm;
5609467b48Spatrick 
5709467b48Spatrick #define DEBUG_TYPE "aarch64-ldst-opt"
5809467b48Spatrick 
5909467b48Spatrick STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
6009467b48Spatrick STATISTIC(NumPostFolded, "Number of post-index updates folded");
6109467b48Spatrick STATISTIC(NumPreFolded, "Number of pre-index updates folded");
6209467b48Spatrick STATISTIC(NumUnscaledPairCreated,
6309467b48Spatrick           "Number of load/store from unscaled generated");
6409467b48Spatrick STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
6509467b48Spatrick STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
6609467b48Spatrick 
6709467b48Spatrick DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
6809467b48Spatrick               "Controls which pairs are considered for renaming");
6909467b48Spatrick 
7009467b48Spatrick // The LdStLimit limits how far we search for load/store pairs.
7109467b48Spatrick static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
7209467b48Spatrick                                    cl::init(20), cl::Hidden);
7309467b48Spatrick 
7409467b48Spatrick // The UpdateLimit limits how far we search for update instructions when we form
7509467b48Spatrick // pre-/post-index instructions.
7609467b48Spatrick static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
7709467b48Spatrick                                      cl::Hidden);
7809467b48Spatrick 
7909467b48Spatrick // Enable register renaming to find additional store pairing opportunities.
8009467b48Spatrick static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
81097a140dSpatrick                                     cl::init(true), cl::Hidden);
8209467b48Spatrick 
8309467b48Spatrick #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
8409467b48Spatrick 
8509467b48Spatrick namespace {
8609467b48Spatrick 
8709467b48Spatrick using LdStPairFlags = struct LdStPairFlags {
8809467b48Spatrick   // If a matching instruction is found, MergeForward is set to true if the
8909467b48Spatrick   // merge is to remove the first instruction and replace the second with
9009467b48Spatrick   // a pair-wise insn, and false if the reverse is true.
9109467b48Spatrick   bool MergeForward = false;
9209467b48Spatrick 
9309467b48Spatrick   // SExtIdx gives the index of the result of the load pair that must be
9409467b48Spatrick   // extended. The value of SExtIdx assumes that the paired load produces the
9509467b48Spatrick   // value in this order: (I, returned iterator), i.e., -1 means no value has
9609467b48Spatrick   // to be extended, 0 means I, and 1 means the returned iterator.
9709467b48Spatrick   int SExtIdx = -1;
9809467b48Spatrick 
9909467b48Spatrick   // If not none, RenameReg can be used to rename the result register of the
10009467b48Spatrick   // first store in a pair. Currently this only works when merging stores
10109467b48Spatrick   // forward.
102*d415bd75Srobert   std::optional<MCPhysReg> RenameReg;
10309467b48Spatrick 
10409467b48Spatrick   LdStPairFlags() = default;
10509467b48Spatrick 
10609467b48Spatrick   void setMergeForward(bool V = true) { MergeForward = V; }
10709467b48Spatrick   bool getMergeForward() const { return MergeForward; }
10809467b48Spatrick 
10909467b48Spatrick   void setSExtIdx(int V) { SExtIdx = V; }
11009467b48Spatrick   int getSExtIdx() const { return SExtIdx; }
11109467b48Spatrick 
11209467b48Spatrick   void setRenameReg(MCPhysReg R) { RenameReg = R; }
113*d415bd75Srobert   void clearRenameReg() { RenameReg = std::nullopt; }
114*d415bd75Srobert   std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
11509467b48Spatrick };
11609467b48Spatrick 
11709467b48Spatrick struct AArch64LoadStoreOpt : public MachineFunctionPass {
11809467b48Spatrick   static char ID;
11909467b48Spatrick 
AArch64LoadStoreOpt__anon530bd02e0111::AArch64LoadStoreOpt12009467b48Spatrick   AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
12109467b48Spatrick     initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
12209467b48Spatrick   }
12309467b48Spatrick 
12409467b48Spatrick   AliasAnalysis *AA;
12509467b48Spatrick   const AArch64InstrInfo *TII;
12609467b48Spatrick   const TargetRegisterInfo *TRI;
12709467b48Spatrick   const AArch64Subtarget *Subtarget;
12809467b48Spatrick 
12909467b48Spatrick   // Track which register units have been modified and used.
13009467b48Spatrick   LiveRegUnits ModifiedRegUnits, UsedRegUnits;
13109467b48Spatrick   LiveRegUnits DefinedInBB;
13209467b48Spatrick 
getAnalysisUsage__anon530bd02e0111::AArch64LoadStoreOpt13309467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
13409467b48Spatrick     AU.addRequired<AAResultsWrapperPass>();
13509467b48Spatrick     MachineFunctionPass::getAnalysisUsage(AU);
13609467b48Spatrick   }
13709467b48Spatrick 
13809467b48Spatrick   // Scan the instructions looking for a load/store that can be combined
13909467b48Spatrick   // with the current instruction into a load/store pair.
14009467b48Spatrick   // Return the matching instruction if one is found, else MBB->end().
14109467b48Spatrick   MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
14209467b48Spatrick                                                LdStPairFlags &Flags,
14309467b48Spatrick                                                unsigned Limit,
14409467b48Spatrick                                                bool FindNarrowMerge);
14509467b48Spatrick 
14609467b48Spatrick   // Scan the instructions looking for a store that writes to the address from
14709467b48Spatrick   // which the current load instruction reads. Return true if one is found.
14809467b48Spatrick   bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
14909467b48Spatrick                          MachineBasicBlock::iterator &StoreI);
15009467b48Spatrick 
15109467b48Spatrick   // Merge the two instructions indicated into a wider narrow store instruction.
15209467b48Spatrick   MachineBasicBlock::iterator
15309467b48Spatrick   mergeNarrowZeroStores(MachineBasicBlock::iterator I,
15409467b48Spatrick                         MachineBasicBlock::iterator MergeMI,
15509467b48Spatrick                         const LdStPairFlags &Flags);
15609467b48Spatrick 
15709467b48Spatrick   // Merge the two instructions indicated into a single pair-wise instruction.
15809467b48Spatrick   MachineBasicBlock::iterator
15909467b48Spatrick   mergePairedInsns(MachineBasicBlock::iterator I,
16009467b48Spatrick                    MachineBasicBlock::iterator Paired,
16109467b48Spatrick                    const LdStPairFlags &Flags);
16209467b48Spatrick 
16309467b48Spatrick   // Promote the load that reads directly from the address stored to.
16409467b48Spatrick   MachineBasicBlock::iterator
16509467b48Spatrick   promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
16609467b48Spatrick                        MachineBasicBlock::iterator StoreI);
16709467b48Spatrick 
16809467b48Spatrick   // Scan the instruction list to find a base register update that can
16909467b48Spatrick   // be combined with the current instruction (a load or store) using
17009467b48Spatrick   // pre or post indexed addressing with writeback. Scan forwards.
17109467b48Spatrick   MachineBasicBlock::iterator
17209467b48Spatrick   findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
17309467b48Spatrick                                 int UnscaledOffset, unsigned Limit);
17409467b48Spatrick 
17509467b48Spatrick   // Scan the instruction list to find a base register update that can
17609467b48Spatrick   // be combined with the current instruction (a load or store) using
17709467b48Spatrick   // pre or post indexed addressing with writeback. Scan backwards.
17809467b48Spatrick   MachineBasicBlock::iterator
17909467b48Spatrick   findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
18009467b48Spatrick 
18109467b48Spatrick   // Find an instruction that updates the base register of the ld/st
18209467b48Spatrick   // instruction.
18309467b48Spatrick   bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
18409467b48Spatrick                             unsigned BaseReg, int Offset);
18509467b48Spatrick 
18609467b48Spatrick   // Merge a pre- or post-index base register update into a ld/st instruction.
18709467b48Spatrick   MachineBasicBlock::iterator
18809467b48Spatrick   mergeUpdateInsn(MachineBasicBlock::iterator I,
18909467b48Spatrick                   MachineBasicBlock::iterator Update, bool IsPreIdx);
19009467b48Spatrick 
19109467b48Spatrick   // Find and merge zero store instructions.
19209467b48Spatrick   bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
19309467b48Spatrick 
19409467b48Spatrick   // Find and pair ldr/str instructions.
19509467b48Spatrick   bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
19609467b48Spatrick 
19709467b48Spatrick   // Find and promote load instructions which read directly from store.
19809467b48Spatrick   bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
19909467b48Spatrick 
20009467b48Spatrick   // Find and merge a base register updates before or after a ld/st instruction.
20109467b48Spatrick   bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
20209467b48Spatrick 
20309467b48Spatrick   bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
20409467b48Spatrick 
20509467b48Spatrick   bool runOnMachineFunction(MachineFunction &Fn) override;
20609467b48Spatrick 
getRequiredProperties__anon530bd02e0111::AArch64LoadStoreOpt20709467b48Spatrick   MachineFunctionProperties getRequiredProperties() const override {
20809467b48Spatrick     return MachineFunctionProperties().set(
20909467b48Spatrick         MachineFunctionProperties::Property::NoVRegs);
21009467b48Spatrick   }
21109467b48Spatrick 
getPassName__anon530bd02e0111::AArch64LoadStoreOpt21209467b48Spatrick   StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
21309467b48Spatrick };
21409467b48Spatrick 
21509467b48Spatrick char AArch64LoadStoreOpt::ID = 0;
21609467b48Spatrick 
21709467b48Spatrick } // end anonymous namespace
21809467b48Spatrick 
21909467b48Spatrick INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
22009467b48Spatrick                 AARCH64_LOAD_STORE_OPT_NAME, false, false)
22109467b48Spatrick 
isNarrowStore(unsigned Opc)22209467b48Spatrick static bool isNarrowStore(unsigned Opc) {
22309467b48Spatrick   switch (Opc) {
22409467b48Spatrick   default:
22509467b48Spatrick     return false;
22609467b48Spatrick   case AArch64::STRBBui:
22709467b48Spatrick   case AArch64::STURBBi:
22809467b48Spatrick   case AArch64::STRHHui:
22909467b48Spatrick   case AArch64::STURHHi:
23009467b48Spatrick     return true;
23109467b48Spatrick   }
23209467b48Spatrick }
23309467b48Spatrick 
23409467b48Spatrick // These instruction set memory tag and either keep memory contents unchanged or
23509467b48Spatrick // set it to zero, ignoring the address part of the source register.
isTagStore(const MachineInstr & MI)23609467b48Spatrick static bool isTagStore(const MachineInstr &MI) {
23709467b48Spatrick   switch (MI.getOpcode()) {
23809467b48Spatrick   default:
23909467b48Spatrick     return false;
24009467b48Spatrick   case AArch64::STGOffset:
24109467b48Spatrick   case AArch64::STZGOffset:
24209467b48Spatrick   case AArch64::ST2GOffset:
24309467b48Spatrick   case AArch64::STZ2GOffset:
24409467b48Spatrick     return true;
24509467b48Spatrick   }
24609467b48Spatrick }
24709467b48Spatrick 
getMatchingNonSExtOpcode(unsigned Opc,bool * IsValidLdStrOpc=nullptr)24809467b48Spatrick static unsigned getMatchingNonSExtOpcode(unsigned Opc,
24909467b48Spatrick                                          bool *IsValidLdStrOpc = nullptr) {
25009467b48Spatrick   if (IsValidLdStrOpc)
25109467b48Spatrick     *IsValidLdStrOpc = true;
25209467b48Spatrick   switch (Opc) {
25309467b48Spatrick   default:
25409467b48Spatrick     if (IsValidLdStrOpc)
25509467b48Spatrick       *IsValidLdStrOpc = false;
25609467b48Spatrick     return std::numeric_limits<unsigned>::max();
25709467b48Spatrick   case AArch64::STRDui:
25809467b48Spatrick   case AArch64::STURDi:
25973471bf0Spatrick   case AArch64::STRDpre:
26009467b48Spatrick   case AArch64::STRQui:
26109467b48Spatrick   case AArch64::STURQi:
26273471bf0Spatrick   case AArch64::STRQpre:
26309467b48Spatrick   case AArch64::STRBBui:
26409467b48Spatrick   case AArch64::STURBBi:
26509467b48Spatrick   case AArch64::STRHHui:
26609467b48Spatrick   case AArch64::STURHHi:
26709467b48Spatrick   case AArch64::STRWui:
26873471bf0Spatrick   case AArch64::STRWpre:
26909467b48Spatrick   case AArch64::STURWi:
27009467b48Spatrick   case AArch64::STRXui:
27173471bf0Spatrick   case AArch64::STRXpre:
27209467b48Spatrick   case AArch64::STURXi:
27309467b48Spatrick   case AArch64::LDRDui:
27409467b48Spatrick   case AArch64::LDURDi:
27573471bf0Spatrick   case AArch64::LDRDpre:
27609467b48Spatrick   case AArch64::LDRQui:
27709467b48Spatrick   case AArch64::LDURQi:
27873471bf0Spatrick   case AArch64::LDRQpre:
27909467b48Spatrick   case AArch64::LDRWui:
28009467b48Spatrick   case AArch64::LDURWi:
28173471bf0Spatrick   case AArch64::LDRWpre:
28209467b48Spatrick   case AArch64::LDRXui:
28309467b48Spatrick   case AArch64::LDURXi:
28473471bf0Spatrick   case AArch64::LDRXpre:
28509467b48Spatrick   case AArch64::STRSui:
28609467b48Spatrick   case AArch64::STURSi:
28773471bf0Spatrick   case AArch64::STRSpre:
28809467b48Spatrick   case AArch64::LDRSui:
28909467b48Spatrick   case AArch64::LDURSi:
29073471bf0Spatrick   case AArch64::LDRSpre:
29109467b48Spatrick     return Opc;
29209467b48Spatrick   case AArch64::LDRSWui:
29309467b48Spatrick     return AArch64::LDRWui;
29409467b48Spatrick   case AArch64::LDURSWi:
29509467b48Spatrick     return AArch64::LDURWi;
29609467b48Spatrick   }
29709467b48Spatrick }
29809467b48Spatrick 
getMatchingWideOpcode(unsigned Opc)29909467b48Spatrick static unsigned getMatchingWideOpcode(unsigned Opc) {
30009467b48Spatrick   switch (Opc) {
30109467b48Spatrick   default:
30209467b48Spatrick     llvm_unreachable("Opcode has no wide equivalent!");
30309467b48Spatrick   case AArch64::STRBBui:
30409467b48Spatrick     return AArch64::STRHHui;
30509467b48Spatrick   case AArch64::STRHHui:
30609467b48Spatrick     return AArch64::STRWui;
30709467b48Spatrick   case AArch64::STURBBi:
30809467b48Spatrick     return AArch64::STURHHi;
30909467b48Spatrick   case AArch64::STURHHi:
31009467b48Spatrick     return AArch64::STURWi;
31109467b48Spatrick   case AArch64::STURWi:
31209467b48Spatrick     return AArch64::STURXi;
31309467b48Spatrick   case AArch64::STRWui:
31409467b48Spatrick     return AArch64::STRXui;
31509467b48Spatrick   }
31609467b48Spatrick }
31709467b48Spatrick 
getMatchingPairOpcode(unsigned Opc)31809467b48Spatrick static unsigned getMatchingPairOpcode(unsigned Opc) {
31909467b48Spatrick   switch (Opc) {
32009467b48Spatrick   default:
32109467b48Spatrick     llvm_unreachable("Opcode has no pairwise equivalent!");
32209467b48Spatrick   case AArch64::STRSui:
32309467b48Spatrick   case AArch64::STURSi:
32409467b48Spatrick     return AArch64::STPSi;
32573471bf0Spatrick   case AArch64::STRSpre:
32673471bf0Spatrick     return AArch64::STPSpre;
32709467b48Spatrick   case AArch64::STRDui:
32809467b48Spatrick   case AArch64::STURDi:
32909467b48Spatrick     return AArch64::STPDi;
33073471bf0Spatrick   case AArch64::STRDpre:
33173471bf0Spatrick     return AArch64::STPDpre;
33209467b48Spatrick   case AArch64::STRQui:
33309467b48Spatrick   case AArch64::STURQi:
33409467b48Spatrick     return AArch64::STPQi;
33573471bf0Spatrick   case AArch64::STRQpre:
33673471bf0Spatrick     return AArch64::STPQpre;
33709467b48Spatrick   case AArch64::STRWui:
33809467b48Spatrick   case AArch64::STURWi:
33909467b48Spatrick     return AArch64::STPWi;
34073471bf0Spatrick   case AArch64::STRWpre:
34173471bf0Spatrick     return AArch64::STPWpre;
34209467b48Spatrick   case AArch64::STRXui:
34309467b48Spatrick   case AArch64::STURXi:
34409467b48Spatrick     return AArch64::STPXi;
34573471bf0Spatrick   case AArch64::STRXpre:
34673471bf0Spatrick     return AArch64::STPXpre;
34709467b48Spatrick   case AArch64::LDRSui:
34809467b48Spatrick   case AArch64::LDURSi:
34909467b48Spatrick     return AArch64::LDPSi;
35073471bf0Spatrick   case AArch64::LDRSpre:
35173471bf0Spatrick     return AArch64::LDPSpre;
35209467b48Spatrick   case AArch64::LDRDui:
35309467b48Spatrick   case AArch64::LDURDi:
35409467b48Spatrick     return AArch64::LDPDi;
35573471bf0Spatrick   case AArch64::LDRDpre:
35673471bf0Spatrick     return AArch64::LDPDpre;
35709467b48Spatrick   case AArch64::LDRQui:
35809467b48Spatrick   case AArch64::LDURQi:
35909467b48Spatrick     return AArch64::LDPQi;
36073471bf0Spatrick   case AArch64::LDRQpre:
36173471bf0Spatrick     return AArch64::LDPQpre;
36209467b48Spatrick   case AArch64::LDRWui:
36309467b48Spatrick   case AArch64::LDURWi:
36409467b48Spatrick     return AArch64::LDPWi;
36573471bf0Spatrick   case AArch64::LDRWpre:
36673471bf0Spatrick     return AArch64::LDPWpre;
36709467b48Spatrick   case AArch64::LDRXui:
36809467b48Spatrick   case AArch64::LDURXi:
36909467b48Spatrick     return AArch64::LDPXi;
37073471bf0Spatrick   case AArch64::LDRXpre:
37173471bf0Spatrick     return AArch64::LDPXpre;
37209467b48Spatrick   case AArch64::LDRSWui:
37309467b48Spatrick   case AArch64::LDURSWi:
37409467b48Spatrick     return AArch64::LDPSWi;
37509467b48Spatrick   }
37609467b48Spatrick }
37709467b48Spatrick 
isMatchingStore(MachineInstr & LoadInst,MachineInstr & StoreInst)37809467b48Spatrick static unsigned isMatchingStore(MachineInstr &LoadInst,
37909467b48Spatrick                                 MachineInstr &StoreInst) {
38009467b48Spatrick   unsigned LdOpc = LoadInst.getOpcode();
38109467b48Spatrick   unsigned StOpc = StoreInst.getOpcode();
38209467b48Spatrick   switch (LdOpc) {
38309467b48Spatrick   default:
38409467b48Spatrick     llvm_unreachable("Unsupported load instruction!");
38509467b48Spatrick   case AArch64::LDRBBui:
38609467b48Spatrick     return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
38709467b48Spatrick            StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
38809467b48Spatrick   case AArch64::LDURBBi:
38909467b48Spatrick     return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
39009467b48Spatrick            StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
39109467b48Spatrick   case AArch64::LDRHHui:
39209467b48Spatrick     return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
39309467b48Spatrick            StOpc == AArch64::STRXui;
39409467b48Spatrick   case AArch64::LDURHHi:
39509467b48Spatrick     return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
39609467b48Spatrick            StOpc == AArch64::STURXi;
39709467b48Spatrick   case AArch64::LDRWui:
39809467b48Spatrick     return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
39909467b48Spatrick   case AArch64::LDURWi:
40009467b48Spatrick     return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
40109467b48Spatrick   case AArch64::LDRXui:
40209467b48Spatrick     return StOpc == AArch64::STRXui;
40309467b48Spatrick   case AArch64::LDURXi:
40409467b48Spatrick     return StOpc == AArch64::STURXi;
40509467b48Spatrick   }
40609467b48Spatrick }
40709467b48Spatrick 
getPreIndexedOpcode(unsigned Opc)40809467b48Spatrick static unsigned getPreIndexedOpcode(unsigned Opc) {
40909467b48Spatrick   // FIXME: We don't currently support creating pre-indexed loads/stores when
41009467b48Spatrick   // the load or store is the unscaled version.  If we decide to perform such an
41109467b48Spatrick   // optimization in the future the cases for the unscaled loads/stores will
41209467b48Spatrick   // need to be added here.
41309467b48Spatrick   switch (Opc) {
41409467b48Spatrick   default:
41509467b48Spatrick     llvm_unreachable("Opcode has no pre-indexed equivalent!");
41609467b48Spatrick   case AArch64::STRSui:
41709467b48Spatrick     return AArch64::STRSpre;
41809467b48Spatrick   case AArch64::STRDui:
41909467b48Spatrick     return AArch64::STRDpre;
42009467b48Spatrick   case AArch64::STRQui:
42109467b48Spatrick     return AArch64::STRQpre;
42209467b48Spatrick   case AArch64::STRBBui:
42309467b48Spatrick     return AArch64::STRBBpre;
42409467b48Spatrick   case AArch64::STRHHui:
42509467b48Spatrick     return AArch64::STRHHpre;
42609467b48Spatrick   case AArch64::STRWui:
42709467b48Spatrick     return AArch64::STRWpre;
42809467b48Spatrick   case AArch64::STRXui:
42909467b48Spatrick     return AArch64::STRXpre;
43009467b48Spatrick   case AArch64::LDRSui:
43109467b48Spatrick     return AArch64::LDRSpre;
43209467b48Spatrick   case AArch64::LDRDui:
43309467b48Spatrick     return AArch64::LDRDpre;
43409467b48Spatrick   case AArch64::LDRQui:
43509467b48Spatrick     return AArch64::LDRQpre;
43609467b48Spatrick   case AArch64::LDRBBui:
43709467b48Spatrick     return AArch64::LDRBBpre;
43809467b48Spatrick   case AArch64::LDRHHui:
43909467b48Spatrick     return AArch64::LDRHHpre;
44009467b48Spatrick   case AArch64::LDRWui:
44109467b48Spatrick     return AArch64::LDRWpre;
44209467b48Spatrick   case AArch64::LDRXui:
44309467b48Spatrick     return AArch64::LDRXpre;
44409467b48Spatrick   case AArch64::LDRSWui:
44509467b48Spatrick     return AArch64::LDRSWpre;
44609467b48Spatrick   case AArch64::LDPSi:
44709467b48Spatrick     return AArch64::LDPSpre;
44809467b48Spatrick   case AArch64::LDPSWi:
44909467b48Spatrick     return AArch64::LDPSWpre;
45009467b48Spatrick   case AArch64::LDPDi:
45109467b48Spatrick     return AArch64::LDPDpre;
45209467b48Spatrick   case AArch64::LDPQi:
45309467b48Spatrick     return AArch64::LDPQpre;
45409467b48Spatrick   case AArch64::LDPWi:
45509467b48Spatrick     return AArch64::LDPWpre;
45609467b48Spatrick   case AArch64::LDPXi:
45709467b48Spatrick     return AArch64::LDPXpre;
45809467b48Spatrick   case AArch64::STPSi:
45909467b48Spatrick     return AArch64::STPSpre;
46009467b48Spatrick   case AArch64::STPDi:
46109467b48Spatrick     return AArch64::STPDpre;
46209467b48Spatrick   case AArch64::STPQi:
46309467b48Spatrick     return AArch64::STPQpre;
46409467b48Spatrick   case AArch64::STPWi:
46509467b48Spatrick     return AArch64::STPWpre;
46609467b48Spatrick   case AArch64::STPXi:
46709467b48Spatrick     return AArch64::STPXpre;
46809467b48Spatrick   case AArch64::STGOffset:
46909467b48Spatrick     return AArch64::STGPreIndex;
47009467b48Spatrick   case AArch64::STZGOffset:
47109467b48Spatrick     return AArch64::STZGPreIndex;
47209467b48Spatrick   case AArch64::ST2GOffset:
47309467b48Spatrick     return AArch64::ST2GPreIndex;
47409467b48Spatrick   case AArch64::STZ2GOffset:
47509467b48Spatrick     return AArch64::STZ2GPreIndex;
47609467b48Spatrick   case AArch64::STGPi:
47709467b48Spatrick     return AArch64::STGPpre;
47809467b48Spatrick   }
47909467b48Spatrick }
48009467b48Spatrick 
getPostIndexedOpcode(unsigned Opc)48109467b48Spatrick static unsigned getPostIndexedOpcode(unsigned Opc) {
48209467b48Spatrick   switch (Opc) {
48309467b48Spatrick   default:
48409467b48Spatrick     llvm_unreachable("Opcode has no post-indexed wise equivalent!");
48509467b48Spatrick   case AArch64::STRSui:
48609467b48Spatrick   case AArch64::STURSi:
48709467b48Spatrick     return AArch64::STRSpost;
48809467b48Spatrick   case AArch64::STRDui:
48909467b48Spatrick   case AArch64::STURDi:
49009467b48Spatrick     return AArch64::STRDpost;
49109467b48Spatrick   case AArch64::STRQui:
49209467b48Spatrick   case AArch64::STURQi:
49309467b48Spatrick     return AArch64::STRQpost;
49409467b48Spatrick   case AArch64::STRBBui:
49509467b48Spatrick     return AArch64::STRBBpost;
49609467b48Spatrick   case AArch64::STRHHui:
49709467b48Spatrick     return AArch64::STRHHpost;
49809467b48Spatrick   case AArch64::STRWui:
49909467b48Spatrick   case AArch64::STURWi:
50009467b48Spatrick     return AArch64::STRWpost;
50109467b48Spatrick   case AArch64::STRXui:
50209467b48Spatrick   case AArch64::STURXi:
50309467b48Spatrick     return AArch64::STRXpost;
50409467b48Spatrick   case AArch64::LDRSui:
50509467b48Spatrick   case AArch64::LDURSi:
50609467b48Spatrick     return AArch64::LDRSpost;
50709467b48Spatrick   case AArch64::LDRDui:
50809467b48Spatrick   case AArch64::LDURDi:
50909467b48Spatrick     return AArch64::LDRDpost;
51009467b48Spatrick   case AArch64::LDRQui:
51109467b48Spatrick   case AArch64::LDURQi:
51209467b48Spatrick     return AArch64::LDRQpost;
51309467b48Spatrick   case AArch64::LDRBBui:
51409467b48Spatrick     return AArch64::LDRBBpost;
51509467b48Spatrick   case AArch64::LDRHHui:
51609467b48Spatrick     return AArch64::LDRHHpost;
51709467b48Spatrick   case AArch64::LDRWui:
51809467b48Spatrick   case AArch64::LDURWi:
51909467b48Spatrick     return AArch64::LDRWpost;
52009467b48Spatrick   case AArch64::LDRXui:
52109467b48Spatrick   case AArch64::LDURXi:
52209467b48Spatrick     return AArch64::LDRXpost;
52309467b48Spatrick   case AArch64::LDRSWui:
52409467b48Spatrick     return AArch64::LDRSWpost;
52509467b48Spatrick   case AArch64::LDPSi:
52609467b48Spatrick     return AArch64::LDPSpost;
52709467b48Spatrick   case AArch64::LDPSWi:
52809467b48Spatrick     return AArch64::LDPSWpost;
52909467b48Spatrick   case AArch64::LDPDi:
53009467b48Spatrick     return AArch64::LDPDpost;
53109467b48Spatrick   case AArch64::LDPQi:
53209467b48Spatrick     return AArch64::LDPQpost;
53309467b48Spatrick   case AArch64::LDPWi:
53409467b48Spatrick     return AArch64::LDPWpost;
53509467b48Spatrick   case AArch64::LDPXi:
53609467b48Spatrick     return AArch64::LDPXpost;
53709467b48Spatrick   case AArch64::STPSi:
53809467b48Spatrick     return AArch64::STPSpost;
53909467b48Spatrick   case AArch64::STPDi:
54009467b48Spatrick     return AArch64::STPDpost;
54109467b48Spatrick   case AArch64::STPQi:
54209467b48Spatrick     return AArch64::STPQpost;
54309467b48Spatrick   case AArch64::STPWi:
54409467b48Spatrick     return AArch64::STPWpost;
54509467b48Spatrick   case AArch64::STPXi:
54609467b48Spatrick     return AArch64::STPXpost;
54709467b48Spatrick   case AArch64::STGOffset:
54809467b48Spatrick     return AArch64::STGPostIndex;
54909467b48Spatrick   case AArch64::STZGOffset:
55009467b48Spatrick     return AArch64::STZGPostIndex;
55109467b48Spatrick   case AArch64::ST2GOffset:
55209467b48Spatrick     return AArch64::ST2GPostIndex;
55309467b48Spatrick   case AArch64::STZ2GOffset:
55409467b48Spatrick     return AArch64::STZ2GPostIndex;
55509467b48Spatrick   case AArch64::STGPi:
55609467b48Spatrick     return AArch64::STGPpost;
55709467b48Spatrick   }
55809467b48Spatrick }
55909467b48Spatrick 
isPreLdStPairCandidate(MachineInstr & FirstMI,MachineInstr & MI)56073471bf0Spatrick static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
56173471bf0Spatrick 
56273471bf0Spatrick   unsigned OpcA = FirstMI.getOpcode();
56373471bf0Spatrick   unsigned OpcB = MI.getOpcode();
56473471bf0Spatrick 
56573471bf0Spatrick   switch (OpcA) {
56673471bf0Spatrick   default:
56773471bf0Spatrick     return false;
56873471bf0Spatrick   case AArch64::STRSpre:
56973471bf0Spatrick     return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
57073471bf0Spatrick   case AArch64::STRDpre:
57173471bf0Spatrick     return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
57273471bf0Spatrick   case AArch64::STRQpre:
57373471bf0Spatrick     return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
57473471bf0Spatrick   case AArch64::STRWpre:
57573471bf0Spatrick     return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
57673471bf0Spatrick   case AArch64::STRXpre:
57773471bf0Spatrick     return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
57873471bf0Spatrick   case AArch64::LDRSpre:
57973471bf0Spatrick     return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
58073471bf0Spatrick   case AArch64::LDRDpre:
58173471bf0Spatrick     return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
58273471bf0Spatrick   case AArch64::LDRQpre:
58373471bf0Spatrick     return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
58473471bf0Spatrick   case AArch64::LDRWpre:
58573471bf0Spatrick     return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
58673471bf0Spatrick   case AArch64::LDRXpre:
58773471bf0Spatrick     return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
58873471bf0Spatrick   }
58973471bf0Spatrick }
59073471bf0Spatrick 
59109467b48Spatrick // Returns the scale and offset range of pre/post indexed variants of MI.
getPrePostIndexedMemOpInfo(const MachineInstr & MI,int & Scale,int & MinOffset,int & MaxOffset)59209467b48Spatrick static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
59309467b48Spatrick                                        int &MinOffset, int &MaxOffset) {
594*d415bd75Srobert   bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
59509467b48Spatrick   bool IsTagStore = isTagStore(MI);
59609467b48Spatrick   // ST*G and all paired ldst have the same scale in pre/post-indexed variants
59709467b48Spatrick   // as in the "unsigned offset" variant.
59809467b48Spatrick   // All other pre/post indexed ldst instructions are unscaled.
59909467b48Spatrick   Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
60009467b48Spatrick 
60109467b48Spatrick   if (IsPaired) {
60209467b48Spatrick     MinOffset = -64;
60309467b48Spatrick     MaxOffset = 63;
60409467b48Spatrick   } else {
60509467b48Spatrick     MinOffset = -256;
60609467b48Spatrick     MaxOffset = 255;
60709467b48Spatrick   }
60809467b48Spatrick }
60909467b48Spatrick 
getLdStRegOp(MachineInstr & MI,unsigned PairedRegOp=0)61009467b48Spatrick static MachineOperand &getLdStRegOp(MachineInstr &MI,
61109467b48Spatrick                                     unsigned PairedRegOp = 0) {
61209467b48Spatrick   assert(PairedRegOp < 2 && "Unexpected register operand idx.");
61373471bf0Spatrick   bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
61473471bf0Spatrick   if (IsPreLdSt)
61573471bf0Spatrick     PairedRegOp += 1;
616*d415bd75Srobert   unsigned Idx =
617*d415bd75Srobert       AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
61809467b48Spatrick   return MI.getOperand(Idx);
61909467b48Spatrick }
62009467b48Spatrick 
isLdOffsetInRangeOfSt(MachineInstr & LoadInst,MachineInstr & StoreInst,const AArch64InstrInfo * TII)62109467b48Spatrick static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
62209467b48Spatrick                                   MachineInstr &StoreInst,
62309467b48Spatrick                                   const AArch64InstrInfo *TII) {
62409467b48Spatrick   assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
62509467b48Spatrick   int LoadSize = TII->getMemScale(LoadInst);
62609467b48Spatrick   int StoreSize = TII->getMemScale(StoreInst);
627*d415bd75Srobert   int UnscaledStOffset =
628*d415bd75Srobert       TII->hasUnscaledLdStOffset(StoreInst)
629*d415bd75Srobert           ? AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm()
630*d415bd75Srobert           : AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm() * StoreSize;
631*d415bd75Srobert   int UnscaledLdOffset =
632*d415bd75Srobert       TII->hasUnscaledLdStOffset(LoadInst)
633*d415bd75Srobert           ? AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm()
634*d415bd75Srobert           : AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm() * LoadSize;
63509467b48Spatrick   return (UnscaledStOffset <= UnscaledLdOffset) &&
63609467b48Spatrick          (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
63709467b48Spatrick }
63809467b48Spatrick 
isPromotableZeroStoreInst(MachineInstr & MI)63909467b48Spatrick static bool isPromotableZeroStoreInst(MachineInstr &MI) {
64009467b48Spatrick   unsigned Opc = MI.getOpcode();
64109467b48Spatrick   return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
64209467b48Spatrick           isNarrowStore(Opc)) &&
64309467b48Spatrick          getLdStRegOp(MI).getReg() == AArch64::WZR;
64409467b48Spatrick }
64509467b48Spatrick 
isPromotableLoadFromStore(MachineInstr & MI)64609467b48Spatrick static bool isPromotableLoadFromStore(MachineInstr &MI) {
64709467b48Spatrick   switch (MI.getOpcode()) {
64809467b48Spatrick   default:
64909467b48Spatrick     return false;
65009467b48Spatrick   // Scaled instructions.
65109467b48Spatrick   case AArch64::LDRBBui:
65209467b48Spatrick   case AArch64::LDRHHui:
65309467b48Spatrick   case AArch64::LDRWui:
65409467b48Spatrick   case AArch64::LDRXui:
65509467b48Spatrick   // Unscaled instructions.
65609467b48Spatrick   case AArch64::LDURBBi:
65709467b48Spatrick   case AArch64::LDURHHi:
65809467b48Spatrick   case AArch64::LDURWi:
65909467b48Spatrick   case AArch64::LDURXi:
66009467b48Spatrick     return true;
66109467b48Spatrick   }
66209467b48Spatrick }
66309467b48Spatrick 
isMergeableLdStUpdate(MachineInstr & MI)66409467b48Spatrick static bool isMergeableLdStUpdate(MachineInstr &MI) {
66509467b48Spatrick   unsigned Opc = MI.getOpcode();
66609467b48Spatrick   switch (Opc) {
66709467b48Spatrick   default:
66809467b48Spatrick     return false;
66909467b48Spatrick   // Scaled instructions.
67009467b48Spatrick   case AArch64::STRSui:
67109467b48Spatrick   case AArch64::STRDui:
67209467b48Spatrick   case AArch64::STRQui:
67309467b48Spatrick   case AArch64::STRXui:
67409467b48Spatrick   case AArch64::STRWui:
67509467b48Spatrick   case AArch64::STRHHui:
67609467b48Spatrick   case AArch64::STRBBui:
67709467b48Spatrick   case AArch64::LDRSui:
67809467b48Spatrick   case AArch64::LDRDui:
67909467b48Spatrick   case AArch64::LDRQui:
68009467b48Spatrick   case AArch64::LDRXui:
68109467b48Spatrick   case AArch64::LDRWui:
68209467b48Spatrick   case AArch64::LDRHHui:
68309467b48Spatrick   case AArch64::LDRBBui:
68409467b48Spatrick   case AArch64::STGOffset:
68509467b48Spatrick   case AArch64::STZGOffset:
68609467b48Spatrick   case AArch64::ST2GOffset:
68709467b48Spatrick   case AArch64::STZ2GOffset:
68809467b48Spatrick   case AArch64::STGPi:
68909467b48Spatrick   // Unscaled instructions.
69009467b48Spatrick   case AArch64::STURSi:
69109467b48Spatrick   case AArch64::STURDi:
69209467b48Spatrick   case AArch64::STURQi:
69309467b48Spatrick   case AArch64::STURWi:
69409467b48Spatrick   case AArch64::STURXi:
69509467b48Spatrick   case AArch64::LDURSi:
69609467b48Spatrick   case AArch64::LDURDi:
69709467b48Spatrick   case AArch64::LDURQi:
69809467b48Spatrick   case AArch64::LDURWi:
69909467b48Spatrick   case AArch64::LDURXi:
70009467b48Spatrick   // Paired instructions.
70109467b48Spatrick   case AArch64::LDPSi:
70209467b48Spatrick   case AArch64::LDPSWi:
70309467b48Spatrick   case AArch64::LDPDi:
70409467b48Spatrick   case AArch64::LDPQi:
70509467b48Spatrick   case AArch64::LDPWi:
70609467b48Spatrick   case AArch64::LDPXi:
70709467b48Spatrick   case AArch64::STPSi:
70809467b48Spatrick   case AArch64::STPDi:
70909467b48Spatrick   case AArch64::STPQi:
71009467b48Spatrick   case AArch64::STPWi:
71109467b48Spatrick   case AArch64::STPXi:
71209467b48Spatrick     // Make sure this is a reg+imm (as opposed to an address reloc).
713*d415bd75Srobert     if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
71409467b48Spatrick       return false;
71509467b48Spatrick 
71609467b48Spatrick     return true;
71709467b48Spatrick   }
71809467b48Spatrick }
71909467b48Spatrick 
72009467b48Spatrick MachineBasicBlock::iterator
mergeNarrowZeroStores(MachineBasicBlock::iterator I,MachineBasicBlock::iterator MergeMI,const LdStPairFlags & Flags)72109467b48Spatrick AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
72209467b48Spatrick                                            MachineBasicBlock::iterator MergeMI,
72309467b48Spatrick                                            const LdStPairFlags &Flags) {
72409467b48Spatrick   assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
72509467b48Spatrick          "Expected promotable zero stores.");
72609467b48Spatrick 
727097a140dSpatrick   MachineBasicBlock::iterator E = I->getParent()->end();
728097a140dSpatrick   MachineBasicBlock::iterator NextI = next_nodbg(I, E);
72909467b48Spatrick   // If NextI is the second of the two instructions to be merged, we need
73009467b48Spatrick   // to skip one further. Either way we merge will invalidate the iterator,
73109467b48Spatrick   // and we don't need to scan the new instruction, as it's a pairwise
73209467b48Spatrick   // instruction, which we're not considering for further action anyway.
73309467b48Spatrick   if (NextI == MergeMI)
734097a140dSpatrick     NextI = next_nodbg(NextI, E);
73509467b48Spatrick 
73609467b48Spatrick   unsigned Opc = I->getOpcode();
73773471bf0Spatrick   bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
73809467b48Spatrick   int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
73909467b48Spatrick 
74009467b48Spatrick   bool MergeForward = Flags.getMergeForward();
74109467b48Spatrick   // Insert our new paired instruction after whichever of the paired
74209467b48Spatrick   // instructions MergeForward indicates.
74309467b48Spatrick   MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
74409467b48Spatrick   // Also based on MergeForward is from where we copy the base register operand
74509467b48Spatrick   // so we get the flags compatible with the input code.
74609467b48Spatrick   const MachineOperand &BaseRegOp =
747*d415bd75Srobert       MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
748*d415bd75Srobert                    : AArch64InstrInfo::getLdStBaseOp(*I);
74909467b48Spatrick 
75009467b48Spatrick   // Which register is Rt and which is Rt2 depends on the offset order.
75109467b48Spatrick   MachineInstr *RtMI;
752*d415bd75Srobert   if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() ==
753*d415bd75Srobert       AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
75409467b48Spatrick     RtMI = &*MergeMI;
75509467b48Spatrick   else
75609467b48Spatrick     RtMI = &*I;
75709467b48Spatrick 
758*d415bd75Srobert   int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
75909467b48Spatrick   // Change the scaled offset from small to large type.
76009467b48Spatrick   if (IsScaled) {
76109467b48Spatrick     assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
76209467b48Spatrick     OffsetImm /= 2;
76309467b48Spatrick   }
76409467b48Spatrick 
76509467b48Spatrick   // Construct the new instruction.
76609467b48Spatrick   DebugLoc DL = I->getDebugLoc();
76709467b48Spatrick   MachineBasicBlock *MBB = I->getParent();
76809467b48Spatrick   MachineInstrBuilder MIB;
76909467b48Spatrick   MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
77009467b48Spatrick             .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
77109467b48Spatrick             .add(BaseRegOp)
77209467b48Spatrick             .addImm(OffsetImm)
77309467b48Spatrick             .cloneMergedMemRefs({&*I, &*MergeMI})
77409467b48Spatrick             .setMIFlags(I->mergeFlagsWith(*MergeMI));
77509467b48Spatrick   (void)MIB;
77609467b48Spatrick 
77709467b48Spatrick   LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n    ");
77809467b48Spatrick   LLVM_DEBUG(I->print(dbgs()));
77909467b48Spatrick   LLVM_DEBUG(dbgs() << "    ");
78009467b48Spatrick   LLVM_DEBUG(MergeMI->print(dbgs()));
78109467b48Spatrick   LLVM_DEBUG(dbgs() << "  with instruction:\n    ");
78209467b48Spatrick   LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
78309467b48Spatrick   LLVM_DEBUG(dbgs() << "\n");
78409467b48Spatrick 
78509467b48Spatrick   // Erase the old instructions.
78609467b48Spatrick   I->eraseFromParent();
78709467b48Spatrick   MergeMI->eraseFromParent();
78809467b48Spatrick   return NextI;
78909467b48Spatrick }
79009467b48Spatrick 
79109467b48Spatrick // Apply Fn to all instructions between MI and the beginning of the block, until
79209467b48Spatrick // a def for DefReg is reached. Returns true, iff Fn returns true for all
79309467b48Spatrick // visited instructions. Stop after visiting Limit iterations.
forAllMIsUntilDef(MachineInstr & MI,MCPhysReg DefReg,const TargetRegisterInfo * TRI,unsigned Limit,std::function<bool (MachineInstr &,bool)> & Fn)79409467b48Spatrick static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
79509467b48Spatrick                               const TargetRegisterInfo *TRI, unsigned Limit,
79609467b48Spatrick                               std::function<bool(MachineInstr &, bool)> &Fn) {
79709467b48Spatrick   auto MBB = MI.getParent();
798097a140dSpatrick   for (MachineInstr &I :
799097a140dSpatrick        instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
80009467b48Spatrick     if (!Limit)
80109467b48Spatrick       return false;
80209467b48Spatrick     --Limit;
80309467b48Spatrick 
804097a140dSpatrick     bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
80509467b48Spatrick       return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
80609467b48Spatrick              TRI->regsOverlap(MOP.getReg(), DefReg);
80709467b48Spatrick     });
808097a140dSpatrick     if (!Fn(I, isDef))
80909467b48Spatrick       return false;
81009467b48Spatrick     if (isDef)
81109467b48Spatrick       break;
81209467b48Spatrick   }
81309467b48Spatrick   return true;
81409467b48Spatrick }
81509467b48Spatrick 
updateDefinedRegisters(MachineInstr & MI,LiveRegUnits & Units,const TargetRegisterInfo * TRI)81609467b48Spatrick static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units,
81709467b48Spatrick                                    const TargetRegisterInfo *TRI) {
81809467b48Spatrick 
81909467b48Spatrick   for (const MachineOperand &MOP : phys_regs_and_masks(MI))
82009467b48Spatrick     if (MOP.isReg() && MOP.isKill())
82109467b48Spatrick       Units.removeReg(MOP.getReg());
82209467b48Spatrick 
82309467b48Spatrick   for (const MachineOperand &MOP : phys_regs_and_masks(MI))
82409467b48Spatrick     if (MOP.isReg() && !MOP.isKill())
82509467b48Spatrick       Units.addReg(MOP.getReg());
82609467b48Spatrick }
82709467b48Spatrick 
82809467b48Spatrick MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,MachineBasicBlock::iterator Paired,const LdStPairFlags & Flags)82909467b48Spatrick AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
83009467b48Spatrick                                       MachineBasicBlock::iterator Paired,
83109467b48Spatrick                                       const LdStPairFlags &Flags) {
832097a140dSpatrick   MachineBasicBlock::iterator E = I->getParent()->end();
833097a140dSpatrick   MachineBasicBlock::iterator NextI = next_nodbg(I, E);
83409467b48Spatrick   // If NextI is the second of the two instructions to be merged, we need
83509467b48Spatrick   // to skip one further. Either way we merge will invalidate the iterator,
83609467b48Spatrick   // and we don't need to scan the new instruction, as it's a pairwise
83709467b48Spatrick   // instruction, which we're not considering for further action anyway.
83809467b48Spatrick   if (NextI == Paired)
839097a140dSpatrick     NextI = next_nodbg(NextI, E);
84009467b48Spatrick 
84109467b48Spatrick   int SExtIdx = Flags.getSExtIdx();
84209467b48Spatrick   unsigned Opc =
84309467b48Spatrick       SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
84473471bf0Spatrick   bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
84509467b48Spatrick   int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
84609467b48Spatrick 
84709467b48Spatrick   bool MergeForward = Flags.getMergeForward();
84809467b48Spatrick 
849*d415bd75Srobert   std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
85009467b48Spatrick   if (MergeForward && RenameReg) {
85109467b48Spatrick     MCRegister RegToRename = getLdStRegOp(*I).getReg();
85209467b48Spatrick     DefinedInBB.addReg(*RenameReg);
85309467b48Spatrick 
85409467b48Spatrick     // Return the sub/super register for RenameReg, matching the size of
85509467b48Spatrick     // OriginalReg.
85609467b48Spatrick     auto GetMatchingSubReg = [this,
85709467b48Spatrick                               RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
85809467b48Spatrick       for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
85909467b48Spatrick         if (TRI->getMinimalPhysRegClass(OriginalReg) ==
86009467b48Spatrick             TRI->getMinimalPhysRegClass(SubOrSuper))
86109467b48Spatrick           return SubOrSuper;
86209467b48Spatrick       llvm_unreachable("Should have found matching sub or super register!");
86309467b48Spatrick     };
86409467b48Spatrick 
86509467b48Spatrick     std::function<bool(MachineInstr &, bool)> UpdateMIs =
86609467b48Spatrick         [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
86709467b48Spatrick           if (IsDef) {
86809467b48Spatrick             bool SeenDef = false;
86909467b48Spatrick             for (auto &MOP : MI.operands()) {
87009467b48Spatrick               // Rename the first explicit definition and all implicit
87109467b48Spatrick               // definitions matching RegToRename.
87209467b48Spatrick               if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
87309467b48Spatrick                   (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
87409467b48Spatrick                   TRI->regsOverlap(MOP.getReg(), RegToRename)) {
87509467b48Spatrick                 assert((MOP.isImplicit() ||
87609467b48Spatrick                         (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
87709467b48Spatrick                        "Need renamable operands");
87809467b48Spatrick                 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
87909467b48Spatrick                 SeenDef = true;
88009467b48Spatrick               }
88109467b48Spatrick             }
88209467b48Spatrick           } else {
88309467b48Spatrick             for (auto &MOP : MI.operands()) {
88409467b48Spatrick               if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
88509467b48Spatrick                   TRI->regsOverlap(MOP.getReg(), RegToRename)) {
88609467b48Spatrick                 assert((MOP.isImplicit() ||
88709467b48Spatrick                         (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
88809467b48Spatrick                            "Need renamable operands");
88909467b48Spatrick                 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
89009467b48Spatrick               }
89109467b48Spatrick             }
89209467b48Spatrick           }
89309467b48Spatrick           LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
89409467b48Spatrick           return true;
89509467b48Spatrick         };
89609467b48Spatrick     forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
89709467b48Spatrick 
89809467b48Spatrick #if !defined(NDEBUG)
89909467b48Spatrick     // Make sure the register used for renaming is not used between the paired
90009467b48Spatrick     // instructions. That would trash the content before the new paired
90109467b48Spatrick     // instruction.
90209467b48Spatrick     for (auto &MI :
90309467b48Spatrick          iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
90409467b48Spatrick              std::next(I), std::next(Paired)))
90509467b48Spatrick       assert(all_of(MI.operands(),
90609467b48Spatrick                     [this, &RenameReg](const MachineOperand &MOP) {
90709467b48Spatrick                       return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
908*d415bd75Srobert                              MOP.isUndef() ||
90909467b48Spatrick                              !TRI->regsOverlap(MOP.getReg(), *RenameReg);
91009467b48Spatrick                     }) &&
91109467b48Spatrick              "Rename register used between paired instruction, trashing the "
91209467b48Spatrick              "content");
91309467b48Spatrick #endif
91409467b48Spatrick   }
91509467b48Spatrick 
91609467b48Spatrick   // Insert our new paired instruction after whichever of the paired
91709467b48Spatrick   // instructions MergeForward indicates.
91809467b48Spatrick   MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
91909467b48Spatrick   // Also based on MergeForward is from where we copy the base register operand
92009467b48Spatrick   // so we get the flags compatible with the input code.
92109467b48Spatrick   const MachineOperand &BaseRegOp =
922*d415bd75Srobert       MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
923*d415bd75Srobert                    : AArch64InstrInfo::getLdStBaseOp(*I);
92409467b48Spatrick 
925*d415bd75Srobert   int Offset = AArch64InstrInfo::getLdStOffsetOp(*I).getImm();
926*d415bd75Srobert   int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
92773471bf0Spatrick   bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
92809467b48Spatrick   if (IsUnscaled != PairedIsUnscaled) {
92909467b48Spatrick     // We're trying to pair instructions that differ in how they are scaled.  If
93009467b48Spatrick     // I is scaled then scale the offset of Paired accordingly.  Otherwise, do
93109467b48Spatrick     // the opposite (i.e., make Paired's offset unscaled).
93209467b48Spatrick     int MemSize = TII->getMemScale(*Paired);
93309467b48Spatrick     if (PairedIsUnscaled) {
93409467b48Spatrick       // If the unscaled offset isn't a multiple of the MemSize, we can't
93509467b48Spatrick       // pair the operations together.
93609467b48Spatrick       assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
93709467b48Spatrick              "Offset should be a multiple of the stride!");
93809467b48Spatrick       PairedOffset /= MemSize;
93909467b48Spatrick     } else {
94009467b48Spatrick       PairedOffset *= MemSize;
94109467b48Spatrick     }
94209467b48Spatrick   }
94309467b48Spatrick 
94409467b48Spatrick   // Which register is Rt and which is Rt2 depends on the offset order.
94573471bf0Spatrick   // However, for pre load/stores the Rt should be the one of the pre
94673471bf0Spatrick   // load/store.
94709467b48Spatrick   MachineInstr *RtMI, *Rt2MI;
94873471bf0Spatrick   if (Offset == PairedOffset + OffsetStride &&
94973471bf0Spatrick       !AArch64InstrInfo::isPreLdSt(*I)) {
95009467b48Spatrick     RtMI = &*Paired;
95109467b48Spatrick     Rt2MI = &*I;
95209467b48Spatrick     // Here we swapped the assumption made for SExtIdx.
95309467b48Spatrick     // I.e., we turn ldp I, Paired into ldp Paired, I.
95409467b48Spatrick     // Update the index accordingly.
95509467b48Spatrick     if (SExtIdx != -1)
95609467b48Spatrick       SExtIdx = (SExtIdx + 1) % 2;
95709467b48Spatrick   } else {
95809467b48Spatrick     RtMI = &*I;
95909467b48Spatrick     Rt2MI = &*Paired;
96009467b48Spatrick   }
961*d415bd75Srobert   int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
96209467b48Spatrick   // Scale the immediate offset, if necessary.
96373471bf0Spatrick   if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
96409467b48Spatrick     assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
96509467b48Spatrick            "Unscaled offset cannot be scaled.");
96609467b48Spatrick     OffsetImm /= TII->getMemScale(*RtMI);
96709467b48Spatrick   }
96809467b48Spatrick 
96909467b48Spatrick   // Construct the new instruction.
97009467b48Spatrick   MachineInstrBuilder MIB;
97109467b48Spatrick   DebugLoc DL = I->getDebugLoc();
97209467b48Spatrick   MachineBasicBlock *MBB = I->getParent();
97309467b48Spatrick   MachineOperand RegOp0 = getLdStRegOp(*RtMI);
97409467b48Spatrick   MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
97509467b48Spatrick   // Kill flags may become invalid when moving stores for pairing.
97609467b48Spatrick   if (RegOp0.isUse()) {
97709467b48Spatrick     if (!MergeForward) {
97809467b48Spatrick       // Clear kill flags on store if moving upwards. Example:
97909467b48Spatrick       //   STRWui %w0, ...
98009467b48Spatrick       //   USE %w1
98109467b48Spatrick       //   STRWui kill %w1  ; need to clear kill flag when moving STRWui upwards
98209467b48Spatrick       RegOp0.setIsKill(false);
98309467b48Spatrick       RegOp1.setIsKill(false);
98409467b48Spatrick     } else {
98509467b48Spatrick       // Clear kill flags of the first stores register. Example:
98609467b48Spatrick       //   STRWui %w1, ...
98709467b48Spatrick       //   USE kill %w1   ; need to clear kill flag when moving STRWui downwards
98809467b48Spatrick       //   STRW %w0
98909467b48Spatrick       Register Reg = getLdStRegOp(*I).getReg();
99009467b48Spatrick       for (MachineInstr &MI : make_range(std::next(I), Paired))
99109467b48Spatrick         MI.clearRegisterKills(Reg, TRI);
99209467b48Spatrick     }
99309467b48Spatrick   }
99473471bf0Spatrick 
99573471bf0Spatrick   unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
99673471bf0Spatrick   MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
99773471bf0Spatrick 
99873471bf0Spatrick   // Adds the pre-index operand for pre-indexed ld/st pairs.
99973471bf0Spatrick   if (AArch64InstrInfo::isPreLdSt(*RtMI))
100073471bf0Spatrick     MIB.addReg(BaseRegOp.getReg(), RegState::Define);
100173471bf0Spatrick 
100273471bf0Spatrick   MIB.add(RegOp0)
100309467b48Spatrick       .add(RegOp1)
100409467b48Spatrick       .add(BaseRegOp)
100509467b48Spatrick       .addImm(OffsetImm)
100609467b48Spatrick       .cloneMergedMemRefs({&*I, &*Paired})
100709467b48Spatrick       .setMIFlags(I->mergeFlagsWith(*Paired));
100809467b48Spatrick 
100909467b48Spatrick   (void)MIB;
101009467b48Spatrick 
101109467b48Spatrick   LLVM_DEBUG(
101209467b48Spatrick       dbgs() << "Creating pair load/store. Replacing instructions:\n    ");
101309467b48Spatrick   LLVM_DEBUG(I->print(dbgs()));
101409467b48Spatrick   LLVM_DEBUG(dbgs() << "    ");
101509467b48Spatrick   LLVM_DEBUG(Paired->print(dbgs()));
101609467b48Spatrick   LLVM_DEBUG(dbgs() << "  with instruction:\n    ");
101709467b48Spatrick   if (SExtIdx != -1) {
101809467b48Spatrick     // Generate the sign extension for the proper result of the ldp.
101909467b48Spatrick     // I.e., with X1, that would be:
102009467b48Spatrick     // %w1 = KILL %w1, implicit-def %x1
102109467b48Spatrick     // %x1 = SBFMXri killed %x1, 0, 31
102209467b48Spatrick     MachineOperand &DstMO = MIB->getOperand(SExtIdx);
102309467b48Spatrick     // Right now, DstMO has the extended register, since it comes from an
102409467b48Spatrick     // extended opcode.
102509467b48Spatrick     Register DstRegX = DstMO.getReg();
102609467b48Spatrick     // Get the W variant of that register.
102709467b48Spatrick     Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
102809467b48Spatrick     // Update the result of LDP to use the W instead of the X variant.
102909467b48Spatrick     DstMO.setReg(DstRegW);
103009467b48Spatrick     LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
103109467b48Spatrick     LLVM_DEBUG(dbgs() << "\n");
103209467b48Spatrick     // Make the machine verifier happy by providing a definition for
103309467b48Spatrick     // the X register.
103409467b48Spatrick     // Insert this definition right after the generated LDP, i.e., before
103509467b48Spatrick     // InsertionPoint.
103609467b48Spatrick     MachineInstrBuilder MIBKill =
103709467b48Spatrick         BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
103809467b48Spatrick             .addReg(DstRegW)
103909467b48Spatrick             .addReg(DstRegX, RegState::Define);
104009467b48Spatrick     MIBKill->getOperand(2).setImplicit();
104109467b48Spatrick     // Create the sign extension.
104209467b48Spatrick     MachineInstrBuilder MIBSXTW =
104309467b48Spatrick         BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
104409467b48Spatrick             .addReg(DstRegX)
104509467b48Spatrick             .addImm(0)
104609467b48Spatrick             .addImm(31);
104709467b48Spatrick     (void)MIBSXTW;
104809467b48Spatrick     LLVM_DEBUG(dbgs() << "  Extend operand:\n    ");
104909467b48Spatrick     LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
105009467b48Spatrick   } else {
105109467b48Spatrick     LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
105209467b48Spatrick   }
105309467b48Spatrick   LLVM_DEBUG(dbgs() << "\n");
105409467b48Spatrick 
105509467b48Spatrick   if (MergeForward)
105609467b48Spatrick     for (const MachineOperand &MOP : phys_regs_and_masks(*I))
105709467b48Spatrick       if (MOP.isReg() && MOP.isKill())
105809467b48Spatrick         DefinedInBB.addReg(MOP.getReg());
105909467b48Spatrick 
106009467b48Spatrick   // Erase the old instructions.
106109467b48Spatrick   I->eraseFromParent();
106209467b48Spatrick   Paired->eraseFromParent();
106309467b48Spatrick 
106409467b48Spatrick   return NextI;
106509467b48Spatrick }
106609467b48Spatrick 
106709467b48Spatrick MachineBasicBlock::iterator
promoteLoadFromStore(MachineBasicBlock::iterator LoadI,MachineBasicBlock::iterator StoreI)106809467b48Spatrick AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
106909467b48Spatrick                                           MachineBasicBlock::iterator StoreI) {
1070097a140dSpatrick   MachineBasicBlock::iterator NextI =
1071097a140dSpatrick       next_nodbg(LoadI, LoadI->getParent()->end());
107209467b48Spatrick 
107309467b48Spatrick   int LoadSize = TII->getMemScale(*LoadI);
107409467b48Spatrick   int StoreSize = TII->getMemScale(*StoreI);
107509467b48Spatrick   Register LdRt = getLdStRegOp(*LoadI).getReg();
107609467b48Spatrick   const MachineOperand &StMO = getLdStRegOp(*StoreI);
107709467b48Spatrick   Register StRt = getLdStRegOp(*StoreI).getReg();
107809467b48Spatrick   bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
107909467b48Spatrick 
108009467b48Spatrick   assert((IsStoreXReg ||
108109467b48Spatrick           TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
108209467b48Spatrick          "Unexpected RegClass");
108309467b48Spatrick 
108409467b48Spatrick   MachineInstr *BitExtMI;
108509467b48Spatrick   if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
108609467b48Spatrick     // Remove the load, if the destination register of the loads is the same
108709467b48Spatrick     // register for stored value.
108809467b48Spatrick     if (StRt == LdRt && LoadSize == 8) {
108909467b48Spatrick       for (MachineInstr &MI : make_range(StoreI->getIterator(),
109009467b48Spatrick                                          LoadI->getIterator())) {
109109467b48Spatrick         if (MI.killsRegister(StRt, TRI)) {
109209467b48Spatrick           MI.clearRegisterKills(StRt, TRI);
109309467b48Spatrick           break;
109409467b48Spatrick         }
109509467b48Spatrick       }
109609467b48Spatrick       LLVM_DEBUG(dbgs() << "Remove load instruction:\n    ");
109709467b48Spatrick       LLVM_DEBUG(LoadI->print(dbgs()));
109809467b48Spatrick       LLVM_DEBUG(dbgs() << "\n");
109909467b48Spatrick       LoadI->eraseFromParent();
110009467b48Spatrick       return NextI;
110109467b48Spatrick     }
110209467b48Spatrick     // Replace the load with a mov if the load and store are in the same size.
110309467b48Spatrick     BitExtMI =
110409467b48Spatrick         BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
110509467b48Spatrick                 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
110609467b48Spatrick             .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
110709467b48Spatrick             .add(StMO)
110809467b48Spatrick             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
110909467b48Spatrick             .setMIFlags(LoadI->getFlags());
111009467b48Spatrick   } else {
111109467b48Spatrick     // FIXME: Currently we disable this transformation in big-endian targets as
111209467b48Spatrick     // performance and correctness are verified only in little-endian.
111309467b48Spatrick     if (!Subtarget->isLittleEndian())
111409467b48Spatrick       return NextI;
111573471bf0Spatrick     bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
111673471bf0Spatrick     assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
111709467b48Spatrick            "Unsupported ld/st match");
111809467b48Spatrick     assert(LoadSize <= StoreSize && "Invalid load size");
1119*d415bd75Srobert     int UnscaledLdOffset =
1120*d415bd75Srobert         IsUnscaled
1121*d415bd75Srobert             ? AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm()
1122*d415bd75Srobert             : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1123*d415bd75Srobert     int UnscaledStOffset =
1124*d415bd75Srobert         IsUnscaled
1125*d415bd75Srobert             ? AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm()
1126*d415bd75Srobert             : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
112709467b48Spatrick     int Width = LoadSize * 8;
1128*d415bd75Srobert     Register DestReg =
112909467b48Spatrick         IsStoreXReg ? Register(TRI->getMatchingSuperReg(
113009467b48Spatrick                           LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
113109467b48Spatrick                     : LdRt;
113209467b48Spatrick 
113309467b48Spatrick     assert((UnscaledLdOffset >= UnscaledStOffset &&
113409467b48Spatrick             (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
113509467b48Spatrick            "Invalid offset");
113609467b48Spatrick 
113709467b48Spatrick     int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
113809467b48Spatrick     int Imms = Immr + Width - 1;
113909467b48Spatrick     if (UnscaledLdOffset == UnscaledStOffset) {
114009467b48Spatrick       uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
114109467b48Spatrick                                 | ((Immr) << 6)               // immr
114209467b48Spatrick                                 | ((Imms) << 0)               // imms
114309467b48Spatrick           ;
114409467b48Spatrick 
114509467b48Spatrick       BitExtMI =
114609467b48Spatrick           BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
114709467b48Spatrick                   TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
114809467b48Spatrick                   DestReg)
114909467b48Spatrick               .add(StMO)
115009467b48Spatrick               .addImm(AndMaskEncoded)
115109467b48Spatrick               .setMIFlags(LoadI->getFlags());
115209467b48Spatrick     } else {
115309467b48Spatrick       BitExtMI =
115409467b48Spatrick           BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
115509467b48Spatrick                   TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
115609467b48Spatrick                   DestReg)
115709467b48Spatrick               .add(StMO)
115809467b48Spatrick               .addImm(Immr)
115909467b48Spatrick               .addImm(Imms)
116009467b48Spatrick               .setMIFlags(LoadI->getFlags());
116109467b48Spatrick     }
116209467b48Spatrick   }
116309467b48Spatrick 
116409467b48Spatrick   // Clear kill flags between store and load.
116509467b48Spatrick   for (MachineInstr &MI : make_range(StoreI->getIterator(),
116609467b48Spatrick                                      BitExtMI->getIterator()))
116709467b48Spatrick     if (MI.killsRegister(StRt, TRI)) {
116809467b48Spatrick       MI.clearRegisterKills(StRt, TRI);
116909467b48Spatrick       break;
117009467b48Spatrick     }
117109467b48Spatrick 
117209467b48Spatrick   LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n    ");
117309467b48Spatrick   LLVM_DEBUG(StoreI->print(dbgs()));
117409467b48Spatrick   LLVM_DEBUG(dbgs() << "    ");
117509467b48Spatrick   LLVM_DEBUG(LoadI->print(dbgs()));
117609467b48Spatrick   LLVM_DEBUG(dbgs() << "  with instructions:\n    ");
117709467b48Spatrick   LLVM_DEBUG(StoreI->print(dbgs()));
117809467b48Spatrick   LLVM_DEBUG(dbgs() << "    ");
117909467b48Spatrick   LLVM_DEBUG((BitExtMI)->print(dbgs()));
118009467b48Spatrick   LLVM_DEBUG(dbgs() << "\n");
118109467b48Spatrick 
118209467b48Spatrick   // Erase the old instructions.
118309467b48Spatrick   LoadI->eraseFromParent();
118409467b48Spatrick   return NextI;
118509467b48Spatrick }
118609467b48Spatrick 
inBoundsForPair(bool IsUnscaled,int Offset,int OffsetStride)118709467b48Spatrick static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
118809467b48Spatrick   // Convert the byte-offset used by unscaled into an "element" offset used
118909467b48Spatrick   // by the scaled pair load/store instructions.
119009467b48Spatrick   if (IsUnscaled) {
119109467b48Spatrick     // If the byte-offset isn't a multiple of the stride, there's no point
119209467b48Spatrick     // trying to match it.
119309467b48Spatrick     if (Offset % OffsetStride)
119409467b48Spatrick       return false;
119509467b48Spatrick     Offset /= OffsetStride;
119609467b48Spatrick   }
119709467b48Spatrick   return Offset <= 63 && Offset >= -64;
119809467b48Spatrick }
119909467b48Spatrick 
120009467b48Spatrick // Do alignment, specialized to power of 2 and for signed ints,
120109467b48Spatrick // avoiding having to do a C-style cast from uint_64t to int when
120209467b48Spatrick // using alignTo from include/llvm/Support/MathExtras.h.
120309467b48Spatrick // FIXME: Move this function to include/MathExtras.h?
alignTo(int Num,int PowOf2)120409467b48Spatrick static int alignTo(int Num, int PowOf2) {
120509467b48Spatrick   return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
120609467b48Spatrick }
120709467b48Spatrick 
mayAlias(MachineInstr & MIa,SmallVectorImpl<MachineInstr * > & MemInsns,AliasAnalysis * AA)120809467b48Spatrick static bool mayAlias(MachineInstr &MIa,
120909467b48Spatrick                      SmallVectorImpl<MachineInstr *> &MemInsns,
121009467b48Spatrick                      AliasAnalysis *AA) {
121109467b48Spatrick   for (MachineInstr *MIb : MemInsns)
1212097a140dSpatrick     if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
121309467b48Spatrick       return true;
121409467b48Spatrick 
121509467b48Spatrick   return false;
121609467b48Spatrick }
121709467b48Spatrick 
findMatchingStore(MachineBasicBlock::iterator I,unsigned Limit,MachineBasicBlock::iterator & StoreI)121809467b48Spatrick bool AArch64LoadStoreOpt::findMatchingStore(
121909467b48Spatrick     MachineBasicBlock::iterator I, unsigned Limit,
122009467b48Spatrick     MachineBasicBlock::iterator &StoreI) {
122109467b48Spatrick   MachineBasicBlock::iterator B = I->getParent()->begin();
122209467b48Spatrick   MachineBasicBlock::iterator MBBI = I;
122309467b48Spatrick   MachineInstr &LoadMI = *I;
1224*d415bd75Srobert   Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg();
122509467b48Spatrick 
122609467b48Spatrick   // If the load is the first instruction in the block, there's obviously
122709467b48Spatrick   // not any matching store.
122809467b48Spatrick   if (MBBI == B)
122909467b48Spatrick     return false;
123009467b48Spatrick 
123109467b48Spatrick   // Track which register units have been modified and used between the first
123209467b48Spatrick   // insn and the second insn.
123309467b48Spatrick   ModifiedRegUnits.clear();
123409467b48Spatrick   UsedRegUnits.clear();
123509467b48Spatrick 
123609467b48Spatrick   unsigned Count = 0;
123709467b48Spatrick   do {
1238097a140dSpatrick     MBBI = prev_nodbg(MBBI, B);
123909467b48Spatrick     MachineInstr &MI = *MBBI;
124009467b48Spatrick 
124109467b48Spatrick     // Don't count transient instructions towards the search limit since there
124209467b48Spatrick     // may be different numbers of them if e.g. debug information is present.
124309467b48Spatrick     if (!MI.isTransient())
124409467b48Spatrick       ++Count;
124509467b48Spatrick 
124609467b48Spatrick     // If the load instruction reads directly from the address to which the
124709467b48Spatrick     // store instruction writes and the stored value is not modified, we can
124809467b48Spatrick     // promote the load. Since we do not handle stores with pre-/post-index,
124909467b48Spatrick     // it's unnecessary to check if BaseReg is modified by the store itself.
125073471bf0Spatrick     // Also we can't handle stores without an immediate offset operand,
125173471bf0Spatrick     // while the operand might be the address for a global variable.
125209467b48Spatrick     if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1253*d415bd75Srobert         BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() &&
1254*d415bd75Srobert         AArch64InstrInfo::getLdStOffsetOp(MI).isImm() &&
125509467b48Spatrick         isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
125609467b48Spatrick         ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
125709467b48Spatrick       StoreI = MBBI;
125809467b48Spatrick       return true;
125909467b48Spatrick     }
126009467b48Spatrick 
126109467b48Spatrick     if (MI.isCall())
126209467b48Spatrick       return false;
126309467b48Spatrick 
126409467b48Spatrick     // Update modified / uses register units.
126509467b48Spatrick     LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
126609467b48Spatrick 
126709467b48Spatrick     // Otherwise, if the base register is modified, we have no match, so
126809467b48Spatrick     // return early.
126909467b48Spatrick     if (!ModifiedRegUnits.available(BaseReg))
127009467b48Spatrick       return false;
127109467b48Spatrick 
127209467b48Spatrick     // If we encounter a store aliased with the load, return early.
1273097a140dSpatrick     if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
127409467b48Spatrick       return false;
127509467b48Spatrick   } while (MBBI != B && Count < Limit);
127609467b48Spatrick   return false;
127709467b48Spatrick }
127809467b48Spatrick 
needsWinCFI(const MachineFunction * MF)1279*d415bd75Srobert static bool needsWinCFI(const MachineFunction *MF) {
1280*d415bd75Srobert   return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1281*d415bd75Srobert          MF->getFunction().needsUnwindTableEntry();
1282*d415bd75Srobert }
1283*d415bd75Srobert 
128409467b48Spatrick // Returns true if FirstMI and MI are candidates for merging or pairing.
128509467b48Spatrick // Otherwise, returns false.
areCandidatesToMergeOrPair(MachineInstr & FirstMI,MachineInstr & MI,LdStPairFlags & Flags,const AArch64InstrInfo * TII)128609467b48Spatrick static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
128709467b48Spatrick                                        LdStPairFlags &Flags,
128809467b48Spatrick                                        const AArch64InstrInfo *TII) {
128909467b48Spatrick   // If this is volatile or if pairing is suppressed, not a candidate.
129009467b48Spatrick   if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
129109467b48Spatrick     return false;
129209467b48Spatrick 
129309467b48Spatrick   // We should have already checked FirstMI for pair suppression and volatility.
129409467b48Spatrick   assert(!FirstMI.hasOrderedMemoryRef() &&
129509467b48Spatrick          !TII->isLdStPairSuppressed(FirstMI) &&
129609467b48Spatrick          "FirstMI shouldn't get here if either of these checks are true.");
129709467b48Spatrick 
1298*d415bd75Srobert   if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1299*d415bd75Srobert                                   MI.getFlag(MachineInstr::FrameDestroy)))
1300*d415bd75Srobert     return false;
1301*d415bd75Srobert 
130209467b48Spatrick   unsigned OpcA = FirstMI.getOpcode();
130309467b48Spatrick   unsigned OpcB = MI.getOpcode();
130409467b48Spatrick 
130573471bf0Spatrick   // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
130609467b48Spatrick   if (OpcA == OpcB)
130773471bf0Spatrick     return !AArch64InstrInfo::isPreLdSt(FirstMI);
130809467b48Spatrick 
130909467b48Spatrick   // Try to match a sign-extended load/store with a zero-extended load/store.
131009467b48Spatrick   bool IsValidLdStrOpc, PairIsValidLdStrOpc;
131109467b48Spatrick   unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
131209467b48Spatrick   assert(IsValidLdStrOpc &&
131309467b48Spatrick          "Given Opc should be a Load or Store with an immediate");
131409467b48Spatrick   // OpcA will be the first instruction in the pair.
131509467b48Spatrick   if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
131609467b48Spatrick     Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
131709467b48Spatrick     return true;
131809467b48Spatrick   }
131909467b48Spatrick 
132009467b48Spatrick   // If the second instruction isn't even a mergable/pairable load/store, bail
132109467b48Spatrick   // out.
132209467b48Spatrick   if (!PairIsValidLdStrOpc)
132309467b48Spatrick     return false;
132409467b48Spatrick 
132509467b48Spatrick   // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
132609467b48Spatrick   // offsets.
132709467b48Spatrick   if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
132809467b48Spatrick     return false;
132909467b48Spatrick 
133073471bf0Spatrick   // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
133173471bf0Spatrick   // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
133273471bf0Spatrick   // are candidate pairs that can be merged.
133373471bf0Spatrick   if (isPreLdStPairCandidate(FirstMI, MI))
133473471bf0Spatrick     return true;
133573471bf0Spatrick 
133609467b48Spatrick   // Try to match an unscaled load/store with a scaled load/store.
133773471bf0Spatrick   return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
133809467b48Spatrick          getMatchingPairOpcode(OpcA) == getMatchingPairOpcode(OpcB);
133909467b48Spatrick 
134009467b48Spatrick   // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
134109467b48Spatrick }
134209467b48Spatrick 
134309467b48Spatrick static bool
canRenameUpToDef(MachineInstr & FirstMI,LiveRegUnits & UsedInBetween,SmallPtrSetImpl<const TargetRegisterClass * > & RequiredClasses,const TargetRegisterInfo * TRI)134409467b48Spatrick canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
134509467b48Spatrick                  SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
134609467b48Spatrick                  const TargetRegisterInfo *TRI) {
134709467b48Spatrick   if (!FirstMI.mayStore())
134809467b48Spatrick     return false;
134909467b48Spatrick 
135009467b48Spatrick   // Check if we can find an unused register which we can use to rename
135109467b48Spatrick   // the register used by the first load/store.
135209467b48Spatrick   auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
135309467b48Spatrick   MachineFunction &MF = *FirstMI.getParent()->getParent();
135409467b48Spatrick   if (!RegClass || !MF.getRegInfo().tracksLiveness())
135509467b48Spatrick     return false;
135609467b48Spatrick 
135709467b48Spatrick   auto RegToRename = getLdStRegOp(FirstMI).getReg();
135809467b48Spatrick   // For now, we only rename if the store operand gets killed at the store.
135909467b48Spatrick   if (!getLdStRegOp(FirstMI).isKill() &&
136009467b48Spatrick       !any_of(FirstMI.operands(),
136109467b48Spatrick               [TRI, RegToRename](const MachineOperand &MOP) {
136209467b48Spatrick                 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
136309467b48Spatrick                        MOP.isImplicit() && MOP.isKill() &&
136409467b48Spatrick                        TRI->regsOverlap(RegToRename, MOP.getReg());
136509467b48Spatrick               })) {
136609467b48Spatrick     LLVM_DEBUG(dbgs() << "  Operand not killed at " << FirstMI << "\n");
136709467b48Spatrick     return false;
136809467b48Spatrick   }
1369097a140dSpatrick   auto canRenameMOP = [TRI](const MachineOperand &MOP) {
1370097a140dSpatrick     if (MOP.isReg()) {
1371097a140dSpatrick       auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1372097a140dSpatrick       // Renaming registers with multiple disjunct sub-registers (e.g. the
1373097a140dSpatrick       // result of a LD3) means that all sub-registers are renamed, potentially
1374097a140dSpatrick       // impacting other instructions we did not check. Bail out.
1375097a140dSpatrick       // Note that this relies on the structure of the AArch64 register file. In
1376097a140dSpatrick       // particular, a subregister cannot be written without overwriting the
1377097a140dSpatrick       // whole register.
1378097a140dSpatrick       if (RegClass->HasDisjunctSubRegs) {
1379097a140dSpatrick         LLVM_DEBUG(
1380097a140dSpatrick             dbgs()
1381097a140dSpatrick             << "  Cannot rename operands with multiple disjunct subregisters ("
1382097a140dSpatrick             << MOP << ")\n");
1383097a140dSpatrick         return false;
1384097a140dSpatrick       }
1385097a140dSpatrick     }
138609467b48Spatrick     return MOP.isImplicit() ||
138709467b48Spatrick            (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
138809467b48Spatrick   };
138909467b48Spatrick 
139009467b48Spatrick   bool FoundDef = false;
139109467b48Spatrick 
139209467b48Spatrick   // For each instruction between FirstMI and the previous def for RegToRename,
139309467b48Spatrick   // we
139409467b48Spatrick   // * check if we can rename RegToRename in this instruction
139509467b48Spatrick   // * collect the registers used and required register classes for RegToRename.
139609467b48Spatrick   std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
139709467b48Spatrick                                                            bool IsDef) {
139809467b48Spatrick     LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
139909467b48Spatrick     // Currently we do not try to rename across frame-setup instructions.
140009467b48Spatrick     if (MI.getFlag(MachineInstr::FrameSetup)) {
140109467b48Spatrick       LLVM_DEBUG(dbgs() << "  Cannot rename framesetup instructions currently ("
140209467b48Spatrick                         << MI << ")\n");
140309467b48Spatrick       return false;
140409467b48Spatrick     }
140509467b48Spatrick 
140609467b48Spatrick     UsedInBetween.accumulate(MI);
140709467b48Spatrick 
140809467b48Spatrick     // For a definition, check that we can rename the definition and exit the
140909467b48Spatrick     // loop.
141009467b48Spatrick     FoundDef = IsDef;
141109467b48Spatrick 
141209467b48Spatrick     // For defs, check if we can rename the first def of RegToRename.
141309467b48Spatrick     if (FoundDef) {
141409467b48Spatrick       // For some pseudo instructions, we might not generate code in the end
141509467b48Spatrick       // (e.g. KILL) and we would end up without a correct def for the rename
141609467b48Spatrick       // register.
141709467b48Spatrick       // TODO: This might be overly conservative and we could handle those cases
141809467b48Spatrick       // in multiple ways:
141909467b48Spatrick       //       1. Insert an extra copy, to materialize the def.
142009467b48Spatrick       //       2. Skip pseudo-defs until we find an non-pseudo def.
142109467b48Spatrick       if (MI.isPseudo()) {
142209467b48Spatrick         LLVM_DEBUG(dbgs() << "  Cannot rename pseudo instruction " << MI
142309467b48Spatrick                           << "\n");
142409467b48Spatrick         return false;
142509467b48Spatrick       }
142609467b48Spatrick 
142709467b48Spatrick       for (auto &MOP : MI.operands()) {
142809467b48Spatrick         if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
142909467b48Spatrick             !TRI->regsOverlap(MOP.getReg(), RegToRename))
143009467b48Spatrick           continue;
143109467b48Spatrick         if (!canRenameMOP(MOP)) {
143209467b48Spatrick           LLVM_DEBUG(dbgs()
143309467b48Spatrick                      << "  Cannot rename " << MOP << " in " << MI << "\n");
143409467b48Spatrick           return false;
143509467b48Spatrick         }
143609467b48Spatrick         RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
143709467b48Spatrick       }
143809467b48Spatrick       return true;
143909467b48Spatrick     } else {
144009467b48Spatrick       for (auto &MOP : MI.operands()) {
144109467b48Spatrick         if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
144209467b48Spatrick             !TRI->regsOverlap(MOP.getReg(), RegToRename))
144309467b48Spatrick           continue;
144409467b48Spatrick 
144509467b48Spatrick         if (!canRenameMOP(MOP)) {
144609467b48Spatrick           LLVM_DEBUG(dbgs()
144709467b48Spatrick                      << "  Cannot rename " << MOP << " in " << MI << "\n");
144809467b48Spatrick           return false;
144909467b48Spatrick         }
145009467b48Spatrick         RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
145109467b48Spatrick       }
145209467b48Spatrick     }
145309467b48Spatrick     return true;
145409467b48Spatrick   };
145509467b48Spatrick 
145609467b48Spatrick   if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
145709467b48Spatrick     return false;
145809467b48Spatrick 
145909467b48Spatrick   if (!FoundDef) {
146009467b48Spatrick     LLVM_DEBUG(dbgs() << "  Did not find definition for register in BB\n");
146109467b48Spatrick     return false;
146209467b48Spatrick   }
146309467b48Spatrick   return true;
146409467b48Spatrick }
146509467b48Spatrick 
1466*d415bd75Srobert // Check if we can find a physical register for renaming \p Reg. This register
1467*d415bd75Srobert // must:
1468*d415bd75Srobert // * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1469*d415bd75Srobert //   defined registers up to the point where the renamed register will be used,
1470*d415bd75Srobert // * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1471*d415bd75Srobert //   registers in the range the rename register will be used,
147209467b48Spatrick // * is available in all used register classes (checked using RequiredClasses).
tryToFindRegisterToRename(const MachineFunction & MF,Register Reg,LiveRegUnits & DefinedInBB,LiveRegUnits & UsedInBetween,SmallPtrSetImpl<const TargetRegisterClass * > & RequiredClasses,const TargetRegisterInfo * TRI)1473*d415bd75Srobert static std::optional<MCPhysReg> tryToFindRegisterToRename(
1474*d415bd75Srobert     const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
147509467b48Spatrick     LiveRegUnits &UsedInBetween,
147609467b48Spatrick     SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
147709467b48Spatrick     const TargetRegisterInfo *TRI) {
1478*d415bd75Srobert   const MachineRegisterInfo &RegInfo = MF.getRegInfo();
147909467b48Spatrick 
148009467b48Spatrick   // Checks if any sub- or super-register of PR is callee saved.
148109467b48Spatrick   auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
148209467b48Spatrick     return any_of(TRI->sub_and_superregs_inclusive(PR),
148309467b48Spatrick                   [&MF, TRI](MCPhysReg SubOrSuper) {
148409467b48Spatrick                     return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
148509467b48Spatrick                   });
148609467b48Spatrick   };
148709467b48Spatrick 
148809467b48Spatrick   // Check if PR or one of its sub- or super-registers can be used for all
148909467b48Spatrick   // required register classes.
149009467b48Spatrick   auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
149109467b48Spatrick     return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
149209467b48Spatrick       return any_of(TRI->sub_and_superregs_inclusive(PR),
149309467b48Spatrick                     [C, TRI](MCPhysReg SubOrSuper) {
149409467b48Spatrick                       return C == TRI->getMinimalPhysRegClass(SubOrSuper);
149509467b48Spatrick                     });
149609467b48Spatrick     });
149709467b48Spatrick   };
149809467b48Spatrick 
1499*d415bd75Srobert   auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
150009467b48Spatrick   for (const MCPhysReg &PR : *RegClass) {
150109467b48Spatrick     if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
150209467b48Spatrick         !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
150309467b48Spatrick         CanBeUsedForAllClasses(PR)) {
150409467b48Spatrick       DefinedInBB.addReg(PR);
150509467b48Spatrick       LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
150609467b48Spatrick                         << "\n");
150709467b48Spatrick       return {PR};
150809467b48Spatrick     }
150909467b48Spatrick   }
151009467b48Spatrick   LLVM_DEBUG(dbgs() << "No rename register found from "
151109467b48Spatrick                     << TRI->getRegClassName(RegClass) << "\n");
1512*d415bd75Srobert   return std::nullopt;
151309467b48Spatrick }
151409467b48Spatrick 
151509467b48Spatrick /// Scan the instructions looking for a load/store that can be combined with the
151609467b48Spatrick /// current instruction into a wider equivalent or a load/store pair.
151709467b48Spatrick MachineBasicBlock::iterator
findMatchingInsn(MachineBasicBlock::iterator I,LdStPairFlags & Flags,unsigned Limit,bool FindNarrowMerge)151809467b48Spatrick AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
151909467b48Spatrick                                       LdStPairFlags &Flags, unsigned Limit,
152009467b48Spatrick                                       bool FindNarrowMerge) {
152109467b48Spatrick   MachineBasicBlock::iterator E = I->getParent()->end();
152209467b48Spatrick   MachineBasicBlock::iterator MBBI = I;
152309467b48Spatrick   MachineBasicBlock::iterator MBBIWithRenameReg;
152409467b48Spatrick   MachineInstr &FirstMI = *I;
1525097a140dSpatrick   MBBI = next_nodbg(MBBI, E);
152609467b48Spatrick 
152709467b48Spatrick   bool MayLoad = FirstMI.mayLoad();
152873471bf0Spatrick   bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
152909467b48Spatrick   Register Reg = getLdStRegOp(FirstMI).getReg();
1530*d415bd75Srobert   Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg();
1531*d415bd75Srobert   int Offset = AArch64InstrInfo::getLdStOffsetOp(FirstMI).getImm();
153209467b48Spatrick   int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
153309467b48Spatrick   bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
153409467b48Spatrick 
1535*d415bd75Srobert   std::optional<bool> MaybeCanRename;
153609467b48Spatrick   if (!EnableRenaming)
153709467b48Spatrick     MaybeCanRename = {false};
153809467b48Spatrick 
153909467b48Spatrick   SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
154009467b48Spatrick   LiveRegUnits UsedInBetween;
154109467b48Spatrick   UsedInBetween.init(*TRI);
154209467b48Spatrick 
154309467b48Spatrick   Flags.clearRenameReg();
154409467b48Spatrick 
154509467b48Spatrick   // Track which register units have been modified and used between the first
154609467b48Spatrick   // insn (inclusive) and the second insn.
154709467b48Spatrick   ModifiedRegUnits.clear();
154809467b48Spatrick   UsedRegUnits.clear();
154909467b48Spatrick 
155009467b48Spatrick   // Remember any instructions that read/write memory between FirstMI and MI.
155109467b48Spatrick   SmallVector<MachineInstr *, 4> MemInsns;
155209467b48Spatrick 
1553097a140dSpatrick   for (unsigned Count = 0; MBBI != E && Count < Limit;
1554097a140dSpatrick        MBBI = next_nodbg(MBBI, E)) {
155509467b48Spatrick     MachineInstr &MI = *MBBI;
155609467b48Spatrick 
155709467b48Spatrick     UsedInBetween.accumulate(MI);
155809467b48Spatrick 
155909467b48Spatrick     // Don't count transient instructions towards the search limit since there
156009467b48Spatrick     // may be different numbers of them if e.g. debug information is present.
156109467b48Spatrick     if (!MI.isTransient())
156209467b48Spatrick       ++Count;
156309467b48Spatrick 
156409467b48Spatrick     Flags.setSExtIdx(-1);
156509467b48Spatrick     if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
1566*d415bd75Srobert         AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) {
156709467b48Spatrick       assert(MI.mayLoadOrStore() && "Expected memory operation.");
156809467b48Spatrick       // If we've found another instruction with the same opcode, check to see
156909467b48Spatrick       // if the base and offset are compatible with our starting instruction.
157009467b48Spatrick       // These instructions all have scaled immediate operands, so we just
157109467b48Spatrick       // check for +1/-1. Make sure to check the new instruction offset is
157209467b48Spatrick       // actually an immediate and not a symbolic reference destined for
157309467b48Spatrick       // a relocation.
1574*d415bd75Srobert       Register MIBaseReg = AArch64InstrInfo::getLdStBaseOp(MI).getReg();
1575*d415bd75Srobert       int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
157673471bf0Spatrick       bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
157709467b48Spatrick       if (IsUnscaled != MIIsUnscaled) {
157809467b48Spatrick         // We're trying to pair instructions that differ in how they are scaled.
157909467b48Spatrick         // If FirstMI is scaled then scale the offset of MI accordingly.
158009467b48Spatrick         // Otherwise, do the opposite (i.e., make MI's offset unscaled).
158109467b48Spatrick         int MemSize = TII->getMemScale(MI);
158209467b48Spatrick         if (MIIsUnscaled) {
158309467b48Spatrick           // If the unscaled offset isn't a multiple of the MemSize, we can't
158409467b48Spatrick           // pair the operations together: bail and keep looking.
158509467b48Spatrick           if (MIOffset % MemSize) {
158609467b48Spatrick             LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
158709467b48Spatrick                                               UsedRegUnits, TRI);
158809467b48Spatrick             MemInsns.push_back(&MI);
158909467b48Spatrick             continue;
159009467b48Spatrick           }
159109467b48Spatrick           MIOffset /= MemSize;
159209467b48Spatrick         } else {
159309467b48Spatrick           MIOffset *= MemSize;
159409467b48Spatrick         }
159509467b48Spatrick       }
159609467b48Spatrick 
159773471bf0Spatrick       bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
159873471bf0Spatrick 
159973471bf0Spatrick       if (BaseReg == MIBaseReg) {
160073471bf0Spatrick         // If the offset of the second ld/st is not equal to the size of the
160173471bf0Spatrick         // destination register it can’t be paired with a pre-index ld/st
160273471bf0Spatrick         // pair. Additionally if the base reg is used or modified the operations
160373471bf0Spatrick         // can't be paired: bail and keep looking.
160473471bf0Spatrick         if (IsPreLdSt) {
160573471bf0Spatrick           bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
1606*d415bd75Srobert           bool IsBaseRegUsed = !UsedRegUnits.available(
1607*d415bd75Srobert               AArch64InstrInfo::getLdStBaseOp(MI).getReg());
1608*d415bd75Srobert           bool IsBaseRegModified = !ModifiedRegUnits.available(
1609*d415bd75Srobert               AArch64InstrInfo::getLdStBaseOp(MI).getReg());
161073471bf0Spatrick           // If the stored value and the address of the second instruction is
161173471bf0Spatrick           // the same, it needs to be using the updated register and therefore
161273471bf0Spatrick           // it must not be folded.
161373471bf0Spatrick           bool IsMIRegTheSame =
1614*d415bd75Srobert               TRI->regsOverlap(getLdStRegOp(MI).getReg(),
1615*d415bd75Srobert                                AArch64InstrInfo::getLdStBaseOp(MI).getReg());
161673471bf0Spatrick           if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
161773471bf0Spatrick               IsMIRegTheSame) {
161873471bf0Spatrick             LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
161973471bf0Spatrick                                               UsedRegUnits, TRI);
162073471bf0Spatrick             MemInsns.push_back(&MI);
162173471bf0Spatrick             continue;
162273471bf0Spatrick           }
162373471bf0Spatrick         } else {
162473471bf0Spatrick           if ((Offset != MIOffset + OffsetStride) &&
162573471bf0Spatrick               (Offset + OffsetStride != MIOffset)) {
162673471bf0Spatrick             LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
162773471bf0Spatrick                                               UsedRegUnits, TRI);
162873471bf0Spatrick             MemInsns.push_back(&MI);
162973471bf0Spatrick             continue;
163073471bf0Spatrick           }
163173471bf0Spatrick         }
163273471bf0Spatrick 
163309467b48Spatrick         int MinOffset = Offset < MIOffset ? Offset : MIOffset;
163409467b48Spatrick         if (FindNarrowMerge) {
163509467b48Spatrick           // If the alignment requirements of the scaled wide load/store
163609467b48Spatrick           // instruction can't express the offset of the scaled narrow input,
163709467b48Spatrick           // bail and keep looking. For promotable zero stores, allow only when
163809467b48Spatrick           // the stored value is the same (i.e., WZR).
163909467b48Spatrick           if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
164009467b48Spatrick               (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
164109467b48Spatrick             LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
164209467b48Spatrick                                               UsedRegUnits, TRI);
164309467b48Spatrick             MemInsns.push_back(&MI);
164409467b48Spatrick             continue;
164509467b48Spatrick           }
164609467b48Spatrick         } else {
164709467b48Spatrick           // Pairwise instructions have a 7-bit signed offset field. Single
164809467b48Spatrick           // insns have a 12-bit unsigned offset field.  If the resultant
164909467b48Spatrick           // immediate offset of merging these instructions is out of range for
165009467b48Spatrick           // a pairwise instruction, bail and keep looking.
165109467b48Spatrick           if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
165209467b48Spatrick             LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
165309467b48Spatrick                                               UsedRegUnits, TRI);
165409467b48Spatrick             MemInsns.push_back(&MI);
165509467b48Spatrick             continue;
165609467b48Spatrick           }
165709467b48Spatrick           // If the alignment requirements of the paired (scaled) instruction
165809467b48Spatrick           // can't express the offset of the unscaled input, bail and keep
165909467b48Spatrick           // looking.
166009467b48Spatrick           if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
166109467b48Spatrick             LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
166209467b48Spatrick                                               UsedRegUnits, TRI);
166309467b48Spatrick             MemInsns.push_back(&MI);
166409467b48Spatrick             continue;
166509467b48Spatrick           }
166609467b48Spatrick         }
166773471bf0Spatrick         // If the destination register of one load is the same register or a
166873471bf0Spatrick         // sub/super register of the other load, bail and keep looking. A
166973471bf0Spatrick         // load-pair instruction with both destination registers the same is
167073471bf0Spatrick         // UNPREDICTABLE and will result in an exception.
167173471bf0Spatrick         if (MayLoad &&
167273471bf0Spatrick             TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) {
167309467b48Spatrick           LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
167409467b48Spatrick                                             TRI);
167509467b48Spatrick           MemInsns.push_back(&MI);
167609467b48Spatrick           continue;
167709467b48Spatrick         }
167809467b48Spatrick 
167973471bf0Spatrick         // If the BaseReg has been modified, then we cannot do the optimization.
168073471bf0Spatrick         // For example, in the following pattern
168173471bf0Spatrick         //   ldr x1 [x2]
168273471bf0Spatrick         //   ldr x2 [x3]
168373471bf0Spatrick         //   ldr x4 [x2, #8],
168473471bf0Spatrick         // the first and third ldr cannot be converted to ldp x1, x4, [x2]
168573471bf0Spatrick         if (!ModifiedRegUnits.available(BaseReg))
168673471bf0Spatrick           return E;
168773471bf0Spatrick 
168809467b48Spatrick         // If the Rt of the second instruction was not modified or used between
168909467b48Spatrick         // the two instructions and none of the instructions between the second
169009467b48Spatrick         // and first alias with the second, we can combine the second into the
169109467b48Spatrick         // first.
169209467b48Spatrick         if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
169309467b48Spatrick             !(MI.mayLoad() &&
169409467b48Spatrick               !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
169509467b48Spatrick             !mayAlias(MI, MemInsns, AA)) {
169609467b48Spatrick 
169709467b48Spatrick           Flags.setMergeForward(false);
169809467b48Spatrick           Flags.clearRenameReg();
169909467b48Spatrick           return MBBI;
170009467b48Spatrick         }
170109467b48Spatrick 
170209467b48Spatrick         // Likewise, if the Rt of the first instruction is not modified or used
170309467b48Spatrick         // between the two instructions and none of the instructions between the
170409467b48Spatrick         // first and the second alias with the first, we can combine the first
170509467b48Spatrick         // into the second.
170609467b48Spatrick         if (!(MayLoad &&
170709467b48Spatrick               !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
170809467b48Spatrick             !mayAlias(FirstMI, MemInsns, AA)) {
170909467b48Spatrick 
171009467b48Spatrick           if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
171109467b48Spatrick             Flags.setMergeForward(true);
171209467b48Spatrick             Flags.clearRenameReg();
171309467b48Spatrick             return MBBI;
171409467b48Spatrick           }
171509467b48Spatrick 
171609467b48Spatrick           if (DebugCounter::shouldExecute(RegRenamingCounter)) {
171709467b48Spatrick             if (!MaybeCanRename)
171809467b48Spatrick               MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
171909467b48Spatrick                                                  RequiredClasses, TRI)};
172009467b48Spatrick 
172109467b48Spatrick             if (*MaybeCanRename) {
1722*d415bd75Srobert               std::optional<MCPhysReg> MaybeRenameReg =
1723*d415bd75Srobert                   tryToFindRegisterToRename(*FirstMI.getParent()->getParent(),
1724*d415bd75Srobert                                             Reg, DefinedInBB, UsedInBetween,
1725*d415bd75Srobert                                             RequiredClasses, TRI);
172609467b48Spatrick               if (MaybeRenameReg) {
172709467b48Spatrick                 Flags.setRenameReg(*MaybeRenameReg);
172809467b48Spatrick                 Flags.setMergeForward(true);
172909467b48Spatrick                 MBBIWithRenameReg = MBBI;
173009467b48Spatrick               }
173109467b48Spatrick             }
173209467b48Spatrick           }
173309467b48Spatrick         }
173409467b48Spatrick         // Unable to combine these instructions due to interference in between.
173509467b48Spatrick         // Keep looking.
173609467b48Spatrick       }
173709467b48Spatrick     }
173809467b48Spatrick 
173909467b48Spatrick     if (Flags.getRenameReg())
174009467b48Spatrick       return MBBIWithRenameReg;
174109467b48Spatrick 
174209467b48Spatrick     // If the instruction wasn't a matching load or store.  Stop searching if we
174309467b48Spatrick     // encounter a call instruction that might modify memory.
174409467b48Spatrick     if (MI.isCall())
174509467b48Spatrick       return E;
174609467b48Spatrick 
174709467b48Spatrick     // Update modified / uses register units.
174809467b48Spatrick     LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
174909467b48Spatrick 
175009467b48Spatrick     // Otherwise, if the base register is modified, we have no match, so
175109467b48Spatrick     // return early.
175209467b48Spatrick     if (!ModifiedRegUnits.available(BaseReg))
175309467b48Spatrick       return E;
175409467b48Spatrick 
175509467b48Spatrick     // Update list of instructions that read/write memory.
175609467b48Spatrick     if (MI.mayLoadOrStore())
175709467b48Spatrick       MemInsns.push_back(&MI);
175809467b48Spatrick   }
175909467b48Spatrick   return E;
176009467b48Spatrick }
176109467b48Spatrick 
1762*d415bd75Srobert static MachineBasicBlock::iterator
maybeMoveCFI(MachineInstr & MI,MachineBasicBlock::iterator MaybeCFI)1763*d415bd75Srobert maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {
1764*d415bd75Srobert   auto End = MI.getParent()->end();
1765*d415bd75Srobert   if (MaybeCFI == End ||
1766*d415bd75Srobert       MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
1767*d415bd75Srobert       !(MI.getFlag(MachineInstr::FrameSetup) ||
1768*d415bd75Srobert         MI.getFlag(MachineInstr::FrameDestroy)) ||
1769*d415bd75Srobert       AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
1770*d415bd75Srobert     return End;
1771*d415bd75Srobert 
1772*d415bd75Srobert   const MachineFunction &MF = *MI.getParent()->getParent();
1773*d415bd75Srobert   unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
1774*d415bd75Srobert   const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
1775*d415bd75Srobert   switch (CFI.getOperation()) {
1776*d415bd75Srobert   case MCCFIInstruction::OpDefCfa:
1777*d415bd75Srobert   case MCCFIInstruction::OpDefCfaOffset:
1778*d415bd75Srobert     return MaybeCFI;
1779*d415bd75Srobert   default:
1780*d415bd75Srobert     return End;
1781*d415bd75Srobert   }
1782*d415bd75Srobert }
1783*d415bd75Srobert 
178409467b48Spatrick MachineBasicBlock::iterator
mergeUpdateInsn(MachineBasicBlock::iterator I,MachineBasicBlock::iterator Update,bool IsPreIdx)178509467b48Spatrick AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
178609467b48Spatrick                                      MachineBasicBlock::iterator Update,
178709467b48Spatrick                                      bool IsPreIdx) {
178809467b48Spatrick   assert((Update->getOpcode() == AArch64::ADDXri ||
178909467b48Spatrick           Update->getOpcode() == AArch64::SUBXri) &&
179009467b48Spatrick          "Unexpected base register update instruction to merge!");
1791097a140dSpatrick   MachineBasicBlock::iterator E = I->getParent()->end();
1792097a140dSpatrick   MachineBasicBlock::iterator NextI = next_nodbg(I, E);
1793*d415bd75Srobert 
1794*d415bd75Srobert   // If updating the SP and the following instruction is CFA offset related CFI
1795*d415bd75Srobert   // instruction move it after the merged instruction.
1796*d415bd75Srobert   MachineBasicBlock::iterator CFI =
1797*d415bd75Srobert       IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E;
1798*d415bd75Srobert 
179909467b48Spatrick   // Return the instruction following the merged instruction, which is
180009467b48Spatrick   // the instruction following our unmerged load. Unless that's the add/sub
180109467b48Spatrick   // instruction we're merging, in which case it's the one after that.
1802097a140dSpatrick   if (NextI == Update)
1803097a140dSpatrick     NextI = next_nodbg(NextI, E);
180409467b48Spatrick 
180509467b48Spatrick   int Value = Update->getOperand(2).getImm();
180609467b48Spatrick   assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
180709467b48Spatrick          "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
180809467b48Spatrick   if (Update->getOpcode() == AArch64::SUBXri)
180909467b48Spatrick     Value = -Value;
181009467b48Spatrick 
181109467b48Spatrick   unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
181209467b48Spatrick                              : getPostIndexedOpcode(I->getOpcode());
181309467b48Spatrick   MachineInstrBuilder MIB;
181409467b48Spatrick   int Scale, MinOffset, MaxOffset;
181509467b48Spatrick   getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
1816*d415bd75Srobert   if (!AArch64InstrInfo::isPairedLdSt(*I)) {
181709467b48Spatrick     // Non-paired instruction.
181809467b48Spatrick     MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
181909467b48Spatrick               .add(getLdStRegOp(*Update))
182009467b48Spatrick               .add(getLdStRegOp(*I))
1821*d415bd75Srobert               .add(AArch64InstrInfo::getLdStBaseOp(*I))
182209467b48Spatrick               .addImm(Value / Scale)
182309467b48Spatrick               .setMemRefs(I->memoperands())
182409467b48Spatrick               .setMIFlags(I->mergeFlagsWith(*Update));
182509467b48Spatrick   } else {
182609467b48Spatrick     // Paired instruction.
182709467b48Spatrick     MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
182809467b48Spatrick               .add(getLdStRegOp(*Update))
182909467b48Spatrick               .add(getLdStRegOp(*I, 0))
183009467b48Spatrick               .add(getLdStRegOp(*I, 1))
1831*d415bd75Srobert               .add(AArch64InstrInfo::getLdStBaseOp(*I))
183209467b48Spatrick               .addImm(Value / Scale)
183309467b48Spatrick               .setMemRefs(I->memoperands())
183409467b48Spatrick               .setMIFlags(I->mergeFlagsWith(*Update));
183509467b48Spatrick   }
1836*d415bd75Srobert   if (CFI != E) {
1837*d415bd75Srobert     MachineBasicBlock *MBB = I->getParent();
1838*d415bd75Srobert     MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
1839*d415bd75Srobert   }
184009467b48Spatrick 
184109467b48Spatrick   if (IsPreIdx) {
184209467b48Spatrick     ++NumPreFolded;
184309467b48Spatrick     LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
184409467b48Spatrick   } else {
184509467b48Spatrick     ++NumPostFolded;
184609467b48Spatrick     LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
184709467b48Spatrick   }
184809467b48Spatrick   LLVM_DEBUG(dbgs() << "    Replacing instructions:\n    ");
184909467b48Spatrick   LLVM_DEBUG(I->print(dbgs()));
185009467b48Spatrick   LLVM_DEBUG(dbgs() << "    ");
185109467b48Spatrick   LLVM_DEBUG(Update->print(dbgs()));
185209467b48Spatrick   LLVM_DEBUG(dbgs() << "  with instruction:\n    ");
185309467b48Spatrick   LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
185409467b48Spatrick   LLVM_DEBUG(dbgs() << "\n");
185509467b48Spatrick 
185609467b48Spatrick   // Erase the old instructions for the block.
185709467b48Spatrick   I->eraseFromParent();
185809467b48Spatrick   Update->eraseFromParent();
185909467b48Spatrick 
186009467b48Spatrick   return NextI;
186109467b48Spatrick }
186209467b48Spatrick 
isMatchingUpdateInsn(MachineInstr & MemMI,MachineInstr & MI,unsigned BaseReg,int Offset)186309467b48Spatrick bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
186409467b48Spatrick                                                MachineInstr &MI,
186509467b48Spatrick                                                unsigned BaseReg, int Offset) {
186609467b48Spatrick   switch (MI.getOpcode()) {
186709467b48Spatrick   default:
186809467b48Spatrick     break;
186909467b48Spatrick   case AArch64::SUBXri:
187009467b48Spatrick   case AArch64::ADDXri:
187109467b48Spatrick     // Make sure it's a vanilla immediate operand, not a relocation or
187209467b48Spatrick     // anything else we can't handle.
187309467b48Spatrick     if (!MI.getOperand(2).isImm())
187409467b48Spatrick       break;
187509467b48Spatrick     // Watch out for 1 << 12 shifted value.
187609467b48Spatrick     if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
187709467b48Spatrick       break;
187809467b48Spatrick 
187909467b48Spatrick     // The update instruction source and destination register must be the
188009467b48Spatrick     // same as the load/store base register.
188109467b48Spatrick     if (MI.getOperand(0).getReg() != BaseReg ||
188209467b48Spatrick         MI.getOperand(1).getReg() != BaseReg)
188309467b48Spatrick       break;
188409467b48Spatrick 
188509467b48Spatrick     int UpdateOffset = MI.getOperand(2).getImm();
188609467b48Spatrick     if (MI.getOpcode() == AArch64::SUBXri)
188709467b48Spatrick       UpdateOffset = -UpdateOffset;
188809467b48Spatrick 
188909467b48Spatrick     // The immediate must be a multiple of the scaling factor of the pre/post
189009467b48Spatrick     // indexed instruction.
189109467b48Spatrick     int Scale, MinOffset, MaxOffset;
189209467b48Spatrick     getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
189309467b48Spatrick     if (UpdateOffset % Scale != 0)
189409467b48Spatrick       break;
189509467b48Spatrick 
189609467b48Spatrick     // Scaled offset must fit in the instruction immediate.
189709467b48Spatrick     int ScaledOffset = UpdateOffset / Scale;
189809467b48Spatrick     if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
189909467b48Spatrick       break;
190009467b48Spatrick 
190109467b48Spatrick     // If we have a non-zero Offset, we check that it matches the amount
190209467b48Spatrick     // we're adding to the register.
190309467b48Spatrick     if (!Offset || Offset == UpdateOffset)
190409467b48Spatrick       return true;
190509467b48Spatrick     break;
190609467b48Spatrick   }
190709467b48Spatrick   return false;
190809467b48Spatrick }
190909467b48Spatrick 
findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,int UnscaledOffset,unsigned Limit)191009467b48Spatrick MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
191109467b48Spatrick     MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
191209467b48Spatrick   MachineBasicBlock::iterator E = I->getParent()->end();
191309467b48Spatrick   MachineInstr &MemMI = *I;
191409467b48Spatrick   MachineBasicBlock::iterator MBBI = I;
191509467b48Spatrick 
1916*d415bd75Srobert   Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
1917*d415bd75Srobert   int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
1918*d415bd75Srobert                          TII->getMemScale(MemMI);
191909467b48Spatrick 
192009467b48Spatrick   // Scan forward looking for post-index opportunities.  Updating instructions
192109467b48Spatrick   // can't be formed if the memory instruction doesn't have the offset we're
192209467b48Spatrick   // looking for.
192309467b48Spatrick   if (MIUnscaledOffset != UnscaledOffset)
192409467b48Spatrick     return E;
192509467b48Spatrick 
192609467b48Spatrick   // If the base register overlaps a source/destination register, we can't
192709467b48Spatrick   // merge the update. This does not apply to tag store instructions which
192809467b48Spatrick   // ignore the address part of the source register.
192909467b48Spatrick   // This does not apply to STGPi as well, which does not have unpredictable
193009467b48Spatrick   // behavior in this case unlike normal stores, and always performs writeback
193109467b48Spatrick   // after reading the source register value.
193209467b48Spatrick   if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
1933*d415bd75Srobert     bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
193409467b48Spatrick     for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
193509467b48Spatrick       Register DestReg = getLdStRegOp(MemMI, i).getReg();
193609467b48Spatrick       if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
193709467b48Spatrick         return E;
193809467b48Spatrick     }
193909467b48Spatrick   }
194009467b48Spatrick 
194109467b48Spatrick   // Track which register units have been modified and used between the first
194209467b48Spatrick   // insn (inclusive) and the second insn.
194309467b48Spatrick   ModifiedRegUnits.clear();
194409467b48Spatrick   UsedRegUnits.clear();
1945097a140dSpatrick   MBBI = next_nodbg(MBBI, E);
1946097a140dSpatrick 
1947097a140dSpatrick   // We can't post-increment the stack pointer if any instruction between
1948097a140dSpatrick   // the memory access (I) and the increment (MBBI) can access the memory
1949097a140dSpatrick   // region defined by [SP, MBBI].
1950097a140dSpatrick   const bool BaseRegSP = BaseReg == AArch64::SP;
195173471bf0Spatrick   if (BaseRegSP && needsWinCFI(I->getMF())) {
1952097a140dSpatrick     // FIXME: For now, we always block the optimization over SP in windows
1953097a140dSpatrick     // targets as it requires to adjust the unwind/debug info, messing up
1954097a140dSpatrick     // the unwind info can actually cause a miscompile.
1955097a140dSpatrick     return E;
1956097a140dSpatrick   }
1957097a140dSpatrick 
1958097a140dSpatrick   for (unsigned Count = 0; MBBI != E && Count < Limit;
1959097a140dSpatrick        MBBI = next_nodbg(MBBI, E)) {
196009467b48Spatrick     MachineInstr &MI = *MBBI;
196109467b48Spatrick 
196209467b48Spatrick     // Don't count transient instructions towards the search limit since there
196309467b48Spatrick     // may be different numbers of them if e.g. debug information is present.
196409467b48Spatrick     if (!MI.isTransient())
196509467b48Spatrick       ++Count;
196609467b48Spatrick 
196709467b48Spatrick     // If we found a match, return it.
196809467b48Spatrick     if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
196909467b48Spatrick       return MBBI;
197009467b48Spatrick 
197109467b48Spatrick     // Update the status of what the instruction clobbered and used.
197209467b48Spatrick     LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
197309467b48Spatrick 
197409467b48Spatrick     // Otherwise, if the base register is used or modified, we have no match, so
197509467b48Spatrick     // return early.
1976097a140dSpatrick     // If we are optimizing SP, do not allow instructions that may load or store
1977097a140dSpatrick     // in between the load and the optimized value update.
197809467b48Spatrick     if (!ModifiedRegUnits.available(BaseReg) ||
1979097a140dSpatrick         !UsedRegUnits.available(BaseReg) ||
1980097a140dSpatrick         (BaseRegSP && MBBI->mayLoadOrStore()))
198109467b48Spatrick       return E;
198209467b48Spatrick   }
198309467b48Spatrick   return E;
198409467b48Spatrick }
198509467b48Spatrick 
findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I,unsigned Limit)198609467b48Spatrick MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
198709467b48Spatrick     MachineBasicBlock::iterator I, unsigned Limit) {
198809467b48Spatrick   MachineBasicBlock::iterator B = I->getParent()->begin();
198909467b48Spatrick   MachineBasicBlock::iterator E = I->getParent()->end();
199009467b48Spatrick   MachineInstr &MemMI = *I;
199109467b48Spatrick   MachineBasicBlock::iterator MBBI = I;
199273471bf0Spatrick   MachineFunction &MF = *MemMI.getMF();
199309467b48Spatrick 
1994*d415bd75Srobert   Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
1995*d415bd75Srobert   int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm();
199609467b48Spatrick 
199709467b48Spatrick   // If the load/store is the first instruction in the block, there's obviously
199809467b48Spatrick   // not any matching update. Ditto if the memory offset isn't zero.
199909467b48Spatrick   if (MBBI == B || Offset != 0)
200009467b48Spatrick     return E;
200109467b48Spatrick   // If the base register overlaps a destination register, we can't
200209467b48Spatrick   // merge the update.
200309467b48Spatrick   if (!isTagStore(MemMI)) {
2004*d415bd75Srobert     bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
200509467b48Spatrick     for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
200609467b48Spatrick       Register DestReg = getLdStRegOp(MemMI, i).getReg();
200709467b48Spatrick       if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
200809467b48Spatrick         return E;
200909467b48Spatrick     }
201009467b48Spatrick   }
201109467b48Spatrick 
201273471bf0Spatrick   const bool BaseRegSP = BaseReg == AArch64::SP;
201373471bf0Spatrick   if (BaseRegSP && needsWinCFI(I->getMF())) {
201473471bf0Spatrick     // FIXME: For now, we always block the optimization over SP in windows
201573471bf0Spatrick     // targets as it requires to adjust the unwind/debug info, messing up
201673471bf0Spatrick     // the unwind info can actually cause a miscompile.
201773471bf0Spatrick     return E;
201873471bf0Spatrick   }
201973471bf0Spatrick 
202073471bf0Spatrick   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
202173471bf0Spatrick   unsigned RedZoneSize =
202273471bf0Spatrick       Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
202373471bf0Spatrick 
202409467b48Spatrick   // Track which register units have been modified and used between the first
202509467b48Spatrick   // insn (inclusive) and the second insn.
202609467b48Spatrick   ModifiedRegUnits.clear();
202709467b48Spatrick   UsedRegUnits.clear();
202809467b48Spatrick   unsigned Count = 0;
202973471bf0Spatrick   bool MemAcessBeforeSPPreInc = false;
203009467b48Spatrick   do {
2031097a140dSpatrick     MBBI = prev_nodbg(MBBI, B);
203209467b48Spatrick     MachineInstr &MI = *MBBI;
203309467b48Spatrick 
203409467b48Spatrick     // Don't count transient instructions towards the search limit since there
203509467b48Spatrick     // may be different numbers of them if e.g. debug information is present.
203609467b48Spatrick     if (!MI.isTransient())
203709467b48Spatrick       ++Count;
203809467b48Spatrick 
203909467b48Spatrick     // If we found a match, return it.
204073471bf0Spatrick     if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
204173471bf0Spatrick       // Check that the update value is within our red zone limit (which may be
204273471bf0Spatrick       // zero).
204373471bf0Spatrick       if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
204473471bf0Spatrick         return E;
204509467b48Spatrick       return MBBI;
204673471bf0Spatrick     }
204709467b48Spatrick 
204809467b48Spatrick     // Update the status of what the instruction clobbered and used.
204909467b48Spatrick     LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
205009467b48Spatrick 
205109467b48Spatrick     // Otherwise, if the base register is used or modified, we have no match, so
205209467b48Spatrick     // return early.
205309467b48Spatrick     if (!ModifiedRegUnits.available(BaseReg) ||
205409467b48Spatrick         !UsedRegUnits.available(BaseReg))
205509467b48Spatrick       return E;
205673471bf0Spatrick     // Keep track if we have a memory access before an SP pre-increment, in this
205773471bf0Spatrick     // case we need to validate later that the update amount respects the red
205873471bf0Spatrick     // zone.
205973471bf0Spatrick     if (BaseRegSP && MBBI->mayLoadOrStore())
206073471bf0Spatrick       MemAcessBeforeSPPreInc = true;
206109467b48Spatrick   } while (MBBI != B && Count < Limit);
206209467b48Spatrick   return E;
206309467b48Spatrick }
206409467b48Spatrick 
tryToPromoteLoadFromStore(MachineBasicBlock::iterator & MBBI)206509467b48Spatrick bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
206609467b48Spatrick     MachineBasicBlock::iterator &MBBI) {
206709467b48Spatrick   MachineInstr &MI = *MBBI;
206809467b48Spatrick   // If this is a volatile load, don't mess with it.
206909467b48Spatrick   if (MI.hasOrderedMemoryRef())
207009467b48Spatrick     return false;
207109467b48Spatrick 
2072*d415bd75Srobert   if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2073*d415bd75Srobert     return false;
2074*d415bd75Srobert 
207509467b48Spatrick   // Make sure this is a reg+imm.
207609467b48Spatrick   // FIXME: It is possible to extend it to handle reg+reg cases.
2077*d415bd75Srobert   if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
207809467b48Spatrick     return false;
207909467b48Spatrick 
208009467b48Spatrick   // Look backward up to LdStLimit instructions.
208109467b48Spatrick   MachineBasicBlock::iterator StoreI;
208209467b48Spatrick   if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
208309467b48Spatrick     ++NumLoadsFromStoresPromoted;
208409467b48Spatrick     // Promote the load. Keeping the iterator straight is a
208509467b48Spatrick     // pain, so we let the merge routine tell us what the next instruction
208609467b48Spatrick     // is after it's done mucking about.
208709467b48Spatrick     MBBI = promoteLoadFromStore(MBBI, StoreI);
208809467b48Spatrick     return true;
208909467b48Spatrick   }
209009467b48Spatrick   return false;
209109467b48Spatrick }
209209467b48Spatrick 
209309467b48Spatrick // Merge adjacent zero stores into a wider store.
tryToMergeZeroStInst(MachineBasicBlock::iterator & MBBI)209409467b48Spatrick bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
209509467b48Spatrick     MachineBasicBlock::iterator &MBBI) {
209609467b48Spatrick   assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
209709467b48Spatrick   MachineInstr &MI = *MBBI;
209809467b48Spatrick   MachineBasicBlock::iterator E = MI.getParent()->end();
209909467b48Spatrick 
210009467b48Spatrick   if (!TII->isCandidateToMergeOrPair(MI))
210109467b48Spatrick     return false;
210209467b48Spatrick 
210309467b48Spatrick   // Look ahead up to LdStLimit instructions for a mergable instruction.
210409467b48Spatrick   LdStPairFlags Flags;
210509467b48Spatrick   MachineBasicBlock::iterator MergeMI =
210609467b48Spatrick       findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
210709467b48Spatrick   if (MergeMI != E) {
210809467b48Spatrick     ++NumZeroStoresPromoted;
210909467b48Spatrick 
211009467b48Spatrick     // Keeping the iterator straight is a pain, so we let the merge routine tell
211109467b48Spatrick     // us what the next instruction is after it's done mucking about.
211209467b48Spatrick     MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
211309467b48Spatrick     return true;
211409467b48Spatrick   }
211509467b48Spatrick   return false;
211609467b48Spatrick }
211709467b48Spatrick 
211809467b48Spatrick // Find loads and stores that can be merged into a single load or store pair
211909467b48Spatrick // instruction.
tryToPairLdStInst(MachineBasicBlock::iterator & MBBI)212009467b48Spatrick bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
212109467b48Spatrick   MachineInstr &MI = *MBBI;
212209467b48Spatrick   MachineBasicBlock::iterator E = MI.getParent()->end();
212309467b48Spatrick 
212409467b48Spatrick   if (!TII->isCandidateToMergeOrPair(MI))
212509467b48Spatrick     return false;
212609467b48Spatrick 
212709467b48Spatrick   // Early exit if the offset is not possible to match. (6 bits of positive
212809467b48Spatrick   // range, plus allow an extra one in case we find a later insn that matches
212909467b48Spatrick   // with Offset-1)
213073471bf0Spatrick   bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2131*d415bd75Srobert   int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
213209467b48Spatrick   int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
213309467b48Spatrick   // Allow one more for offset.
213409467b48Spatrick   if (Offset > 0)
213509467b48Spatrick     Offset -= OffsetStride;
213609467b48Spatrick   if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
213709467b48Spatrick     return false;
213809467b48Spatrick 
213909467b48Spatrick   // Look ahead up to LdStLimit instructions for a pairable instruction.
214009467b48Spatrick   LdStPairFlags Flags;
214109467b48Spatrick   MachineBasicBlock::iterator Paired =
214209467b48Spatrick       findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
214309467b48Spatrick   if (Paired != E) {
214409467b48Spatrick     ++NumPairCreated;
214573471bf0Spatrick     if (TII->hasUnscaledLdStOffset(MI))
214609467b48Spatrick       ++NumUnscaledPairCreated;
214709467b48Spatrick     // Keeping the iterator straight is a pain, so we let the merge routine tell
214809467b48Spatrick     // us what the next instruction is after it's done mucking about.
214909467b48Spatrick     auto Prev = std::prev(MBBI);
215009467b48Spatrick     MBBI = mergePairedInsns(MBBI, Paired, Flags);
215109467b48Spatrick     // Collect liveness info for instructions between Prev and the new position
215209467b48Spatrick     // MBBI.
215309467b48Spatrick     for (auto I = std::next(Prev); I != MBBI; I++)
215409467b48Spatrick       updateDefinedRegisters(*I, DefinedInBB, TRI);
215509467b48Spatrick 
215609467b48Spatrick     return true;
215709467b48Spatrick   }
215809467b48Spatrick   return false;
215909467b48Spatrick }
216009467b48Spatrick 
tryToMergeLdStUpdate(MachineBasicBlock::iterator & MBBI)216109467b48Spatrick bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
216209467b48Spatrick     (MachineBasicBlock::iterator &MBBI) {
216309467b48Spatrick   MachineInstr &MI = *MBBI;
216409467b48Spatrick   MachineBasicBlock::iterator E = MI.getParent()->end();
216509467b48Spatrick   MachineBasicBlock::iterator Update;
216609467b48Spatrick 
216709467b48Spatrick   // Look forward to try to form a post-index instruction. For example,
216809467b48Spatrick   // ldr x0, [x20]
216909467b48Spatrick   // add x20, x20, #32
217009467b48Spatrick   //   merged into:
217109467b48Spatrick   // ldr x0, [x20], #32
217209467b48Spatrick   Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
217309467b48Spatrick   if (Update != E) {
217409467b48Spatrick     // Merge the update into the ld/st.
217509467b48Spatrick     MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
217609467b48Spatrick     return true;
217709467b48Spatrick   }
217809467b48Spatrick 
217909467b48Spatrick   // Don't know how to handle unscaled pre/post-index versions below, so bail.
218073471bf0Spatrick   if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
218109467b48Spatrick     return false;
218209467b48Spatrick 
218309467b48Spatrick   // Look back to try to find a pre-index instruction. For example,
218409467b48Spatrick   // add x0, x0, #8
218509467b48Spatrick   // ldr x1, [x0]
218609467b48Spatrick   //   merged into:
218709467b48Spatrick   // ldr x1, [x0, #8]!
218809467b48Spatrick   Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
218909467b48Spatrick   if (Update != E) {
219009467b48Spatrick     // Merge the update into the ld/st.
219109467b48Spatrick     MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
219209467b48Spatrick     return true;
219309467b48Spatrick   }
219409467b48Spatrick 
219509467b48Spatrick   // The immediate in the load/store is scaled by the size of the memory
219609467b48Spatrick   // operation. The immediate in the add we're looking for,
219709467b48Spatrick   // however, is not, so adjust here.
2198*d415bd75Srobert   int UnscaledOffset =
2199*d415bd75Srobert       AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
220009467b48Spatrick 
220109467b48Spatrick   // Look forward to try to find a pre-index instruction. For example,
220209467b48Spatrick   // ldr x1, [x0, #64]
220309467b48Spatrick   // add x0, x0, #64
220409467b48Spatrick   //   merged into:
220509467b48Spatrick   // ldr x1, [x0, #64]!
220609467b48Spatrick   Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
220709467b48Spatrick   if (Update != E) {
220809467b48Spatrick     // Merge the update into the ld/st.
220909467b48Spatrick     MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
221009467b48Spatrick     return true;
221109467b48Spatrick   }
221209467b48Spatrick 
221309467b48Spatrick   return false;
221409467b48Spatrick }
221509467b48Spatrick 
optimizeBlock(MachineBasicBlock & MBB,bool EnableNarrowZeroStOpt)221609467b48Spatrick bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
221709467b48Spatrick                                         bool EnableNarrowZeroStOpt) {
221809467b48Spatrick 
221909467b48Spatrick   bool Modified = false;
222009467b48Spatrick   // Four tranformations to do here:
222109467b48Spatrick   // 1) Find loads that directly read from stores and promote them by
222209467b48Spatrick   //    replacing with mov instructions. If the store is wider than the load,
222309467b48Spatrick   //    the load will be replaced with a bitfield extract.
222409467b48Spatrick   //      e.g.,
222509467b48Spatrick   //        str w1, [x0, #4]
222609467b48Spatrick   //        ldrh w2, [x0, #6]
222709467b48Spatrick   //        ; becomes
222809467b48Spatrick   //        str w1, [x0, #4]
222909467b48Spatrick   //        lsr w2, w1, #16
223009467b48Spatrick   for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
223109467b48Spatrick        MBBI != E;) {
223209467b48Spatrick     if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
223309467b48Spatrick       Modified = true;
223409467b48Spatrick     else
223509467b48Spatrick       ++MBBI;
223609467b48Spatrick   }
223709467b48Spatrick   // 2) Merge adjacent zero stores into a wider store.
223809467b48Spatrick   //      e.g.,
223909467b48Spatrick   //        strh wzr, [x0]
224009467b48Spatrick   //        strh wzr, [x0, #2]
224109467b48Spatrick   //        ; becomes
224209467b48Spatrick   //        str wzr, [x0]
224309467b48Spatrick   //      e.g.,
224409467b48Spatrick   //        str wzr, [x0]
224509467b48Spatrick   //        str wzr, [x0, #4]
224609467b48Spatrick   //        ; becomes
224709467b48Spatrick   //        str xzr, [x0]
224809467b48Spatrick   if (EnableNarrowZeroStOpt)
224909467b48Spatrick     for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
225009467b48Spatrick          MBBI != E;) {
225109467b48Spatrick       if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
225209467b48Spatrick         Modified = true;
225309467b48Spatrick       else
225409467b48Spatrick         ++MBBI;
225509467b48Spatrick     }
225609467b48Spatrick   // 3) Find loads and stores that can be merged into a single load or store
225709467b48Spatrick   //    pair instruction.
225809467b48Spatrick   //      e.g.,
225909467b48Spatrick   //        ldr x0, [x2]
226009467b48Spatrick   //        ldr x1, [x2, #8]
226109467b48Spatrick   //        ; becomes
226209467b48Spatrick   //        ldp x0, x1, [x2]
226309467b48Spatrick 
226409467b48Spatrick   if (MBB.getParent()->getRegInfo().tracksLiveness()) {
226509467b48Spatrick     DefinedInBB.clear();
226609467b48Spatrick     DefinedInBB.addLiveIns(MBB);
226709467b48Spatrick   }
226809467b48Spatrick 
226909467b48Spatrick   for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
227009467b48Spatrick        MBBI != E;) {
227109467b48Spatrick     // Track currently live registers up to this point, to help with
227209467b48Spatrick     // searching for a rename register on demand.
227309467b48Spatrick     updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
227409467b48Spatrick     if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
227509467b48Spatrick       Modified = true;
227609467b48Spatrick     else
227709467b48Spatrick       ++MBBI;
227809467b48Spatrick   }
227909467b48Spatrick   // 4) Find base register updates that can be merged into the load or store
228009467b48Spatrick   //    as a base-reg writeback.
228109467b48Spatrick   //      e.g.,
228209467b48Spatrick   //        ldr x0, [x2]
228309467b48Spatrick   //        add x2, x2, #4
228409467b48Spatrick   //        ; becomes
228509467b48Spatrick   //        ldr x0, [x2], #4
228609467b48Spatrick   for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
228709467b48Spatrick        MBBI != E;) {
228809467b48Spatrick     if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
228909467b48Spatrick       Modified = true;
229009467b48Spatrick     else
229109467b48Spatrick       ++MBBI;
229209467b48Spatrick   }
229309467b48Spatrick 
229409467b48Spatrick   return Modified;
229509467b48Spatrick }
229609467b48Spatrick 
runOnMachineFunction(MachineFunction & Fn)229709467b48Spatrick bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
229809467b48Spatrick   if (skipFunction(Fn.getFunction()))
229909467b48Spatrick     return false;
230009467b48Spatrick 
2301*d415bd75Srobert   Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
230209467b48Spatrick   TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
230309467b48Spatrick   TRI = Subtarget->getRegisterInfo();
230409467b48Spatrick   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
230509467b48Spatrick 
230609467b48Spatrick   // Resize the modified and used register unit trackers.  We do this once
230709467b48Spatrick   // per function and then clear the register units each time we optimize a load
230809467b48Spatrick   // or store.
230909467b48Spatrick   ModifiedRegUnits.init(*TRI);
231009467b48Spatrick   UsedRegUnits.init(*TRI);
231109467b48Spatrick   DefinedInBB.init(*TRI);
231209467b48Spatrick 
231309467b48Spatrick   bool Modified = false;
231409467b48Spatrick   bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
231509467b48Spatrick   for (auto &MBB : Fn) {
231609467b48Spatrick     auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
231709467b48Spatrick     Modified |= M;
231809467b48Spatrick   }
231909467b48Spatrick 
232009467b48Spatrick   return Modified;
232109467b48Spatrick }
232209467b48Spatrick 
232309467b48Spatrick // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
232409467b48Spatrick // stores near one another?  Note: The pre-RA instruction scheduler already has
232509467b48Spatrick // hooks to try and schedule pairable loads/stores together to improve pairing
232609467b48Spatrick // opportunities.  Thus, pre-RA pairing pass may not be worth the effort.
232709467b48Spatrick 
232809467b48Spatrick // FIXME: When pairing store instructions it's very possible for this pass to
232909467b48Spatrick // hoist a store with a KILL marker above another use (without a KILL marker).
233009467b48Spatrick // The resulting IR is invalid, but nothing uses the KILL markers after this
233109467b48Spatrick // pass, so it's never caused a problem in practice.
233209467b48Spatrick 
233309467b48Spatrick /// createAArch64LoadStoreOptimizationPass - returns an instance of the
233409467b48Spatrick /// load / store optimization pass.
createAArch64LoadStoreOptimizationPass()233509467b48Spatrick FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
233609467b48Spatrick   return new AArch64LoadStoreOpt();
233709467b48Spatrick }
2338