xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions
90b57cec5SDimitry Andric /// that may inhibit the HW prefetching.  This is done in two steps.  Before
100b57cec5SDimitry Andric /// ISel, we mark strided loads (i.e. those that will likely benefit from
110b57cec5SDimitry Andric /// prefetching) with metadata.  Then, after opcodes have been finalized, we
120b57cec5SDimitry Andric /// insert MOVs and re-write loads to prevent unintentional tag collisions.
130b57cec5SDimitry Andric // ===---------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AArch64.h"
160b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
170b57cec5SDimitry Andric #include "AArch64Subtarget.h"
180b57cec5SDimitry Andric #include "AArch64TargetMachine.h"
190b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
200b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
210b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
220b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
230b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
300b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
320b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
350b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
360b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
370b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
380b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
390b57cec5SDimitry Andric #include "llvm/IR/Function.h"
400b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
410b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
420b57cec5SDimitry Andric #include "llvm/IR/Metadata.h"
43480093f4SDimitry Andric #include "llvm/InitializePasses.h"
440b57cec5SDimitry Andric #include "llvm/Pass.h"
450b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
460b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
470b57cec5SDimitry Andric #include "llvm/Support/DebugCounter.h"
480b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
490b57cec5SDimitry Andric #include <iterator>
500b57cec5SDimitry Andric #include <utility>
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric using namespace llvm;
530b57cec5SDimitry Andric 
54e8d8bef9SDimitry Andric #define DEBUG_TYPE "aarch64-falkor-hwpf-fix"
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
570b57cec5SDimitry Andric STATISTIC(NumCollisionsAvoided,
580b57cec5SDimitry Andric           "Number of HW prefetch tag collisions avoided");
590b57cec5SDimitry Andric STATISTIC(NumCollisionsNotAvoided,
600b57cec5SDimitry Andric           "Number of HW prefetch tag collisions not avoided due to lack of registers");
610b57cec5SDimitry Andric DEBUG_COUNTER(FixCounter, "falkor-hwpf",
620b57cec5SDimitry Andric               "Controls which tag collisions are avoided");
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric namespace {
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric class FalkorMarkStridedAccesses {
670b57cec5SDimitry Andric public:
680b57cec5SDimitry Andric   FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE)
690b57cec5SDimitry Andric       : LI(LI), SE(SE) {}
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric   bool run();
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric private:
740b57cec5SDimitry Andric   bool runOnLoop(Loop &L);
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric   LoopInfo &LI;
770b57cec5SDimitry Andric   ScalarEvolution &SE;
780b57cec5SDimitry Andric };
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric class FalkorMarkStridedAccessesLegacy : public FunctionPass {
810b57cec5SDimitry Andric public:
820b57cec5SDimitry Andric   static char ID; // Pass ID, replacement for typeid
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric   FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) {
850b57cec5SDimitry Andric     initializeFalkorMarkStridedAccessesLegacyPass(
860b57cec5SDimitry Andric         *PassRegistry::getPassRegistry());
870b57cec5SDimitry Andric   }
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
900b57cec5SDimitry Andric     AU.addRequired<TargetPassConfig>();
910b57cec5SDimitry Andric     AU.addPreserved<DominatorTreeWrapperPass>();
920b57cec5SDimitry Andric     AU.addRequired<LoopInfoWrapperPass>();
930b57cec5SDimitry Andric     AU.addPreserved<LoopInfoWrapperPass>();
940b57cec5SDimitry Andric     AU.addRequired<ScalarEvolutionWrapperPass>();
950b57cec5SDimitry Andric     AU.addPreserved<ScalarEvolutionWrapperPass>();
960b57cec5SDimitry Andric   }
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric   bool runOnFunction(Function &F) override;
990b57cec5SDimitry Andric };
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric } // end anonymous namespace
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric char FalkorMarkStridedAccessesLegacy::ID = 0;
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
1060b57cec5SDimitry Andric                       "Falkor HW Prefetch Fix", false, false)
1070b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
1080b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1090b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1100b57cec5SDimitry Andric INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE,
1110b57cec5SDimitry Andric                     "Falkor HW Prefetch Fix", false, false)
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric FunctionPass *llvm::createFalkorMarkStridedAccessesPass() {
1140b57cec5SDimitry Andric   return new FalkorMarkStridedAccessesLegacy();
1150b57cec5SDimitry Andric }
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) {
1180b57cec5SDimitry Andric   TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
1190b57cec5SDimitry Andric   const AArch64Subtarget *ST =
1200b57cec5SDimitry Andric       TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F);
1210b57cec5SDimitry Andric   if (ST->getProcFamily() != AArch64Subtarget::Falkor)
1220b57cec5SDimitry Andric     return false;
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric   if (skipFunction(F))
1250b57cec5SDimitry Andric     return false;
1260b57cec5SDimitry Andric 
1270b57cec5SDimitry Andric   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1280b57cec5SDimitry Andric   ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric   FalkorMarkStridedAccesses LDP(LI, SE);
1310b57cec5SDimitry Andric   return LDP.run();
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric bool FalkorMarkStridedAccesses::run() {
1350b57cec5SDimitry Andric   bool MadeChange = false;
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric   for (Loop *L : LI)
1380eae32dcSDimitry Andric     for (Loop *LIt : depth_first(L))
1390eae32dcSDimitry Andric       MadeChange |= runOnLoop(*LIt);
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric   return MadeChange;
1420b57cec5SDimitry Andric }
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) {
1450b57cec5SDimitry Andric   // Only mark strided loads in the inner-most loop
146e8d8bef9SDimitry Andric   if (!L.isInnermost())
1470b57cec5SDimitry Andric     return false;
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   bool MadeChange = false;
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric   for (BasicBlock *BB : L.blocks()) {
1520b57cec5SDimitry Andric     for (Instruction &I : *BB) {
1530b57cec5SDimitry Andric       LoadInst *LoadI = dyn_cast<LoadInst>(&I);
1540b57cec5SDimitry Andric       if (!LoadI)
1550b57cec5SDimitry Andric         continue;
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric       Value *PtrValue = LoadI->getPointerOperand();
1580b57cec5SDimitry Andric       if (L.isLoopInvariant(PtrValue))
1590b57cec5SDimitry Andric         continue;
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric       const SCEV *LSCEV = SE.getSCEV(PtrValue);
1620b57cec5SDimitry Andric       const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
1630b57cec5SDimitry Andric       if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
1640b57cec5SDimitry Andric         continue;
1650b57cec5SDimitry Andric 
1660b57cec5SDimitry Andric       LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD,
1670b57cec5SDimitry Andric                          MDNode::get(LoadI->getContext(), {}));
1680b57cec5SDimitry Andric       ++NumStridedLoadsMarked;
1690b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Load: " << I << " marked as strided\n");
1700b57cec5SDimitry Andric       MadeChange = true;
1710b57cec5SDimitry Andric     }
1720b57cec5SDimitry Andric   }
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric   return MadeChange;
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric namespace {
1780b57cec5SDimitry Andric 
1790b57cec5SDimitry Andric class FalkorHWPFFix : public MachineFunctionPass {
1800b57cec5SDimitry Andric public:
1810b57cec5SDimitry Andric   static char ID;
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric   FalkorHWPFFix() : MachineFunctionPass(ID) {
1840b57cec5SDimitry Andric     initializeFalkorHWPFFixPass(*PassRegistry::getPassRegistry());
1850b57cec5SDimitry Andric   }
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &Fn) override;
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
1900b57cec5SDimitry Andric     AU.setPreservesCFG();
191*0fca6ea1SDimitry Andric     AU.addRequired<MachineLoopInfoWrapperPass>();
1920b57cec5SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
1930b57cec5SDimitry Andric   }
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
1960b57cec5SDimitry Andric     return MachineFunctionProperties().set(
1970b57cec5SDimitry Andric         MachineFunctionProperties::Property::NoVRegs);
1980b57cec5SDimitry Andric   }
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric private:
2010b57cec5SDimitry Andric   void runOnLoop(MachineLoop &L, MachineFunction &Fn);
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric   const AArch64InstrInfo *TII;
2040b57cec5SDimitry Andric   const TargetRegisterInfo *TRI;
2050b57cec5SDimitry Andric   DenseMap<unsigned, SmallVector<MachineInstr *, 4>> TagMap;
2060b57cec5SDimitry Andric   bool Modified;
2070b57cec5SDimitry Andric };
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric /// Bits from load opcodes used to compute HW prefetcher instruction tags.
2100b57cec5SDimitry Andric struct LoadInfo {
2110b57cec5SDimitry Andric   LoadInfo() = default;
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric   Register DestReg;
2140b57cec5SDimitry Andric   Register BaseReg;
2150b57cec5SDimitry Andric   int BaseRegIdx = -1;
2160b57cec5SDimitry Andric   const MachineOperand *OffsetOpnd = nullptr;
2170b57cec5SDimitry Andric   bool IsPrePost = false;
2180b57cec5SDimitry Andric };
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric } // end anonymous namespace
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric char FalkorHWPFFix::ID = 0;
2230b57cec5SDimitry Andric 
224e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
2250b57cec5SDimitry Andric                       "Falkor HW Prefetch Fix Late Phase", false, false)
226*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
227e8d8bef9SDimitry Andric INITIALIZE_PASS_END(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
2280b57cec5SDimitry Andric                     "Falkor HW Prefetch Fix Late Phase", false, false)
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) {
2310b57cec5SDimitry Andric   return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8);
2320b57cec5SDimitry Andric }
2330b57cec5SDimitry Andric 
234bdd1243dSDimitry Andric static std::optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
2350b57cec5SDimitry Andric   int DestRegIdx;
2360b57cec5SDimitry Andric   int BaseRegIdx;
2370b57cec5SDimitry Andric   int OffsetIdx;
2380b57cec5SDimitry Andric   bool IsPrePost;
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric   switch (MI.getOpcode()) {
2410b57cec5SDimitry Andric   default:
242bdd1243dSDimitry Andric     return std::nullopt;
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric   case AArch64::LD1i64:
2450b57cec5SDimitry Andric   case AArch64::LD2i64:
2460b57cec5SDimitry Andric     DestRegIdx = 0;
2470b57cec5SDimitry Andric     BaseRegIdx = 3;
2480b57cec5SDimitry Andric     OffsetIdx = -1;
2490b57cec5SDimitry Andric     IsPrePost = false;
2500b57cec5SDimitry Andric     break;
2510b57cec5SDimitry Andric 
2520b57cec5SDimitry Andric   case AArch64::LD1i8:
2530b57cec5SDimitry Andric   case AArch64::LD1i16:
2540b57cec5SDimitry Andric   case AArch64::LD1i32:
2550b57cec5SDimitry Andric   case AArch64::LD2i8:
2560b57cec5SDimitry Andric   case AArch64::LD2i16:
2570b57cec5SDimitry Andric   case AArch64::LD2i32:
2580b57cec5SDimitry Andric   case AArch64::LD3i8:
2590b57cec5SDimitry Andric   case AArch64::LD3i16:
2600b57cec5SDimitry Andric   case AArch64::LD3i32:
2610b57cec5SDimitry Andric   case AArch64::LD3i64:
2620b57cec5SDimitry Andric   case AArch64::LD4i8:
2630b57cec5SDimitry Andric   case AArch64::LD4i16:
2640b57cec5SDimitry Andric   case AArch64::LD4i32:
2650b57cec5SDimitry Andric   case AArch64::LD4i64:
2660b57cec5SDimitry Andric     DestRegIdx = -1;
2670b57cec5SDimitry Andric     BaseRegIdx = 3;
2680b57cec5SDimitry Andric     OffsetIdx = -1;
2690b57cec5SDimitry Andric     IsPrePost = false;
2700b57cec5SDimitry Andric     break;
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric   case AArch64::LD1Onev1d:
2730b57cec5SDimitry Andric   case AArch64::LD1Onev2s:
2740b57cec5SDimitry Andric   case AArch64::LD1Onev4h:
2750b57cec5SDimitry Andric   case AArch64::LD1Onev8b:
2760b57cec5SDimitry Andric   case AArch64::LD1Onev2d:
2770b57cec5SDimitry Andric   case AArch64::LD1Onev4s:
2780b57cec5SDimitry Andric   case AArch64::LD1Onev8h:
2790b57cec5SDimitry Andric   case AArch64::LD1Onev16b:
2800b57cec5SDimitry Andric   case AArch64::LD1Rv1d:
2810b57cec5SDimitry Andric   case AArch64::LD1Rv2s:
2820b57cec5SDimitry Andric   case AArch64::LD1Rv4h:
2830b57cec5SDimitry Andric   case AArch64::LD1Rv8b:
2840b57cec5SDimitry Andric   case AArch64::LD1Rv2d:
2850b57cec5SDimitry Andric   case AArch64::LD1Rv4s:
2860b57cec5SDimitry Andric   case AArch64::LD1Rv8h:
2870b57cec5SDimitry Andric   case AArch64::LD1Rv16b:
2880b57cec5SDimitry Andric     DestRegIdx = 0;
2890b57cec5SDimitry Andric     BaseRegIdx = 1;
2900b57cec5SDimitry Andric     OffsetIdx = -1;
2910b57cec5SDimitry Andric     IsPrePost = false;
2920b57cec5SDimitry Andric     break;
2930b57cec5SDimitry Andric 
2940b57cec5SDimitry Andric   case AArch64::LD1Twov1d:
2950b57cec5SDimitry Andric   case AArch64::LD1Twov2s:
2960b57cec5SDimitry Andric   case AArch64::LD1Twov4h:
2970b57cec5SDimitry Andric   case AArch64::LD1Twov8b:
2980b57cec5SDimitry Andric   case AArch64::LD1Twov2d:
2990b57cec5SDimitry Andric   case AArch64::LD1Twov4s:
3000b57cec5SDimitry Andric   case AArch64::LD1Twov8h:
3010b57cec5SDimitry Andric   case AArch64::LD1Twov16b:
3020b57cec5SDimitry Andric   case AArch64::LD1Threev1d:
3030b57cec5SDimitry Andric   case AArch64::LD1Threev2s:
3040b57cec5SDimitry Andric   case AArch64::LD1Threev4h:
3050b57cec5SDimitry Andric   case AArch64::LD1Threev8b:
3060b57cec5SDimitry Andric   case AArch64::LD1Threev2d:
3070b57cec5SDimitry Andric   case AArch64::LD1Threev4s:
3080b57cec5SDimitry Andric   case AArch64::LD1Threev8h:
3090b57cec5SDimitry Andric   case AArch64::LD1Threev16b:
3100b57cec5SDimitry Andric   case AArch64::LD1Fourv1d:
3110b57cec5SDimitry Andric   case AArch64::LD1Fourv2s:
3120b57cec5SDimitry Andric   case AArch64::LD1Fourv4h:
3130b57cec5SDimitry Andric   case AArch64::LD1Fourv8b:
3140b57cec5SDimitry Andric   case AArch64::LD1Fourv2d:
3150b57cec5SDimitry Andric   case AArch64::LD1Fourv4s:
3160b57cec5SDimitry Andric   case AArch64::LD1Fourv8h:
3170b57cec5SDimitry Andric   case AArch64::LD1Fourv16b:
3180b57cec5SDimitry Andric   case AArch64::LD2Twov2s:
3190b57cec5SDimitry Andric   case AArch64::LD2Twov4s:
3200b57cec5SDimitry Andric   case AArch64::LD2Twov8b:
3210b57cec5SDimitry Andric   case AArch64::LD2Twov2d:
3220b57cec5SDimitry Andric   case AArch64::LD2Twov4h:
3230b57cec5SDimitry Andric   case AArch64::LD2Twov8h:
3240b57cec5SDimitry Andric   case AArch64::LD2Twov16b:
3250b57cec5SDimitry Andric   case AArch64::LD2Rv1d:
3260b57cec5SDimitry Andric   case AArch64::LD2Rv2s:
3270b57cec5SDimitry Andric   case AArch64::LD2Rv4s:
3280b57cec5SDimitry Andric   case AArch64::LD2Rv8b:
3290b57cec5SDimitry Andric   case AArch64::LD2Rv2d:
3300b57cec5SDimitry Andric   case AArch64::LD2Rv4h:
3310b57cec5SDimitry Andric   case AArch64::LD2Rv8h:
3320b57cec5SDimitry Andric   case AArch64::LD2Rv16b:
3330b57cec5SDimitry Andric   case AArch64::LD3Threev2s:
3340b57cec5SDimitry Andric   case AArch64::LD3Threev4h:
3350b57cec5SDimitry Andric   case AArch64::LD3Threev8b:
3360b57cec5SDimitry Andric   case AArch64::LD3Threev2d:
3370b57cec5SDimitry Andric   case AArch64::LD3Threev4s:
3380b57cec5SDimitry Andric   case AArch64::LD3Threev8h:
3390b57cec5SDimitry Andric   case AArch64::LD3Threev16b:
3400b57cec5SDimitry Andric   case AArch64::LD3Rv1d:
3410b57cec5SDimitry Andric   case AArch64::LD3Rv2s:
3420b57cec5SDimitry Andric   case AArch64::LD3Rv4h:
3430b57cec5SDimitry Andric   case AArch64::LD3Rv8b:
3440b57cec5SDimitry Andric   case AArch64::LD3Rv2d:
3450b57cec5SDimitry Andric   case AArch64::LD3Rv4s:
3460b57cec5SDimitry Andric   case AArch64::LD3Rv8h:
3470b57cec5SDimitry Andric   case AArch64::LD3Rv16b:
3480b57cec5SDimitry Andric   case AArch64::LD4Fourv2s:
3490b57cec5SDimitry Andric   case AArch64::LD4Fourv4h:
3500b57cec5SDimitry Andric   case AArch64::LD4Fourv8b:
3510b57cec5SDimitry Andric   case AArch64::LD4Fourv2d:
3520b57cec5SDimitry Andric   case AArch64::LD4Fourv4s:
3530b57cec5SDimitry Andric   case AArch64::LD4Fourv8h:
3540b57cec5SDimitry Andric   case AArch64::LD4Fourv16b:
3550b57cec5SDimitry Andric   case AArch64::LD4Rv1d:
3560b57cec5SDimitry Andric   case AArch64::LD4Rv2s:
3570b57cec5SDimitry Andric   case AArch64::LD4Rv4h:
3580b57cec5SDimitry Andric   case AArch64::LD4Rv8b:
3590b57cec5SDimitry Andric   case AArch64::LD4Rv2d:
3600b57cec5SDimitry Andric   case AArch64::LD4Rv4s:
3610b57cec5SDimitry Andric   case AArch64::LD4Rv8h:
3620b57cec5SDimitry Andric   case AArch64::LD4Rv16b:
3630b57cec5SDimitry Andric     DestRegIdx = -1;
3640b57cec5SDimitry Andric     BaseRegIdx = 1;
3650b57cec5SDimitry Andric     OffsetIdx = -1;
3660b57cec5SDimitry Andric     IsPrePost = false;
3670b57cec5SDimitry Andric     break;
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   case AArch64::LD1i64_POST:
3700b57cec5SDimitry Andric   case AArch64::LD2i64_POST:
3710b57cec5SDimitry Andric     DestRegIdx = 1;
3720b57cec5SDimitry Andric     BaseRegIdx = 4;
3730b57cec5SDimitry Andric     OffsetIdx = 5;
3740b57cec5SDimitry Andric     IsPrePost = true;
3750b57cec5SDimitry Andric     break;
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   case AArch64::LD1i8_POST:
3780b57cec5SDimitry Andric   case AArch64::LD1i16_POST:
3790b57cec5SDimitry Andric   case AArch64::LD1i32_POST:
3800b57cec5SDimitry Andric   case AArch64::LD2i8_POST:
3810b57cec5SDimitry Andric   case AArch64::LD2i16_POST:
3820b57cec5SDimitry Andric   case AArch64::LD2i32_POST:
3830b57cec5SDimitry Andric   case AArch64::LD3i8_POST:
3840b57cec5SDimitry Andric   case AArch64::LD3i16_POST:
3850b57cec5SDimitry Andric   case AArch64::LD3i32_POST:
3860b57cec5SDimitry Andric   case AArch64::LD3i64_POST:
3870b57cec5SDimitry Andric   case AArch64::LD4i8_POST:
3880b57cec5SDimitry Andric   case AArch64::LD4i16_POST:
3890b57cec5SDimitry Andric   case AArch64::LD4i32_POST:
3900b57cec5SDimitry Andric   case AArch64::LD4i64_POST:
3910b57cec5SDimitry Andric     DestRegIdx = -1;
3920b57cec5SDimitry Andric     BaseRegIdx = 4;
3930b57cec5SDimitry Andric     OffsetIdx = 5;
3940b57cec5SDimitry Andric     IsPrePost = true;
3950b57cec5SDimitry Andric     break;
3960b57cec5SDimitry Andric 
3970b57cec5SDimitry Andric   case AArch64::LD1Onev1d_POST:
3980b57cec5SDimitry Andric   case AArch64::LD1Onev2s_POST:
3990b57cec5SDimitry Andric   case AArch64::LD1Onev4h_POST:
4000b57cec5SDimitry Andric   case AArch64::LD1Onev8b_POST:
4010b57cec5SDimitry Andric   case AArch64::LD1Onev2d_POST:
4020b57cec5SDimitry Andric   case AArch64::LD1Onev4s_POST:
4030b57cec5SDimitry Andric   case AArch64::LD1Onev8h_POST:
4040b57cec5SDimitry Andric   case AArch64::LD1Onev16b_POST:
4050b57cec5SDimitry Andric   case AArch64::LD1Rv1d_POST:
4060b57cec5SDimitry Andric   case AArch64::LD1Rv2s_POST:
4070b57cec5SDimitry Andric   case AArch64::LD1Rv4h_POST:
4080b57cec5SDimitry Andric   case AArch64::LD1Rv8b_POST:
4090b57cec5SDimitry Andric   case AArch64::LD1Rv2d_POST:
4100b57cec5SDimitry Andric   case AArch64::LD1Rv4s_POST:
4110b57cec5SDimitry Andric   case AArch64::LD1Rv8h_POST:
4120b57cec5SDimitry Andric   case AArch64::LD1Rv16b_POST:
4130b57cec5SDimitry Andric     DestRegIdx = 1;
4140b57cec5SDimitry Andric     BaseRegIdx = 2;
4150b57cec5SDimitry Andric     OffsetIdx = 3;
4160b57cec5SDimitry Andric     IsPrePost = true;
4170b57cec5SDimitry Andric     break;
4180b57cec5SDimitry Andric 
4190b57cec5SDimitry Andric   case AArch64::LD1Twov1d_POST:
4200b57cec5SDimitry Andric   case AArch64::LD1Twov2s_POST:
4210b57cec5SDimitry Andric   case AArch64::LD1Twov4h_POST:
4220b57cec5SDimitry Andric   case AArch64::LD1Twov8b_POST:
4230b57cec5SDimitry Andric   case AArch64::LD1Twov2d_POST:
4240b57cec5SDimitry Andric   case AArch64::LD1Twov4s_POST:
4250b57cec5SDimitry Andric   case AArch64::LD1Twov8h_POST:
4260b57cec5SDimitry Andric   case AArch64::LD1Twov16b_POST:
4270b57cec5SDimitry Andric   case AArch64::LD1Threev1d_POST:
4280b57cec5SDimitry Andric   case AArch64::LD1Threev2s_POST:
4290b57cec5SDimitry Andric   case AArch64::LD1Threev4h_POST:
4300b57cec5SDimitry Andric   case AArch64::LD1Threev8b_POST:
4310b57cec5SDimitry Andric   case AArch64::LD1Threev2d_POST:
4320b57cec5SDimitry Andric   case AArch64::LD1Threev4s_POST:
4330b57cec5SDimitry Andric   case AArch64::LD1Threev8h_POST:
4340b57cec5SDimitry Andric   case AArch64::LD1Threev16b_POST:
4350b57cec5SDimitry Andric   case AArch64::LD1Fourv1d_POST:
4360b57cec5SDimitry Andric   case AArch64::LD1Fourv2s_POST:
4370b57cec5SDimitry Andric   case AArch64::LD1Fourv4h_POST:
4380b57cec5SDimitry Andric   case AArch64::LD1Fourv8b_POST:
4390b57cec5SDimitry Andric   case AArch64::LD1Fourv2d_POST:
4400b57cec5SDimitry Andric   case AArch64::LD1Fourv4s_POST:
4410b57cec5SDimitry Andric   case AArch64::LD1Fourv8h_POST:
4420b57cec5SDimitry Andric   case AArch64::LD1Fourv16b_POST:
4430b57cec5SDimitry Andric   case AArch64::LD2Twov2s_POST:
4440b57cec5SDimitry Andric   case AArch64::LD2Twov4s_POST:
4450b57cec5SDimitry Andric   case AArch64::LD2Twov8b_POST:
4460b57cec5SDimitry Andric   case AArch64::LD2Twov2d_POST:
4470b57cec5SDimitry Andric   case AArch64::LD2Twov4h_POST:
4480b57cec5SDimitry Andric   case AArch64::LD2Twov8h_POST:
4490b57cec5SDimitry Andric   case AArch64::LD2Twov16b_POST:
4500b57cec5SDimitry Andric   case AArch64::LD2Rv1d_POST:
4510b57cec5SDimitry Andric   case AArch64::LD2Rv2s_POST:
4520b57cec5SDimitry Andric   case AArch64::LD2Rv4s_POST:
4530b57cec5SDimitry Andric   case AArch64::LD2Rv8b_POST:
4540b57cec5SDimitry Andric   case AArch64::LD2Rv2d_POST:
4550b57cec5SDimitry Andric   case AArch64::LD2Rv4h_POST:
4560b57cec5SDimitry Andric   case AArch64::LD2Rv8h_POST:
4570b57cec5SDimitry Andric   case AArch64::LD2Rv16b_POST:
4580b57cec5SDimitry Andric   case AArch64::LD3Threev2s_POST:
4590b57cec5SDimitry Andric   case AArch64::LD3Threev4h_POST:
4600b57cec5SDimitry Andric   case AArch64::LD3Threev8b_POST:
4610b57cec5SDimitry Andric   case AArch64::LD3Threev2d_POST:
4620b57cec5SDimitry Andric   case AArch64::LD3Threev4s_POST:
4630b57cec5SDimitry Andric   case AArch64::LD3Threev8h_POST:
4640b57cec5SDimitry Andric   case AArch64::LD3Threev16b_POST:
4650b57cec5SDimitry Andric   case AArch64::LD3Rv1d_POST:
4660b57cec5SDimitry Andric   case AArch64::LD3Rv2s_POST:
4670b57cec5SDimitry Andric   case AArch64::LD3Rv4h_POST:
4680b57cec5SDimitry Andric   case AArch64::LD3Rv8b_POST:
4690b57cec5SDimitry Andric   case AArch64::LD3Rv2d_POST:
4700b57cec5SDimitry Andric   case AArch64::LD3Rv4s_POST:
4710b57cec5SDimitry Andric   case AArch64::LD3Rv8h_POST:
4720b57cec5SDimitry Andric   case AArch64::LD3Rv16b_POST:
4730b57cec5SDimitry Andric   case AArch64::LD4Fourv2s_POST:
4740b57cec5SDimitry Andric   case AArch64::LD4Fourv4h_POST:
4750b57cec5SDimitry Andric   case AArch64::LD4Fourv8b_POST:
4760b57cec5SDimitry Andric   case AArch64::LD4Fourv2d_POST:
4770b57cec5SDimitry Andric   case AArch64::LD4Fourv4s_POST:
4780b57cec5SDimitry Andric   case AArch64::LD4Fourv8h_POST:
4790b57cec5SDimitry Andric   case AArch64::LD4Fourv16b_POST:
4800b57cec5SDimitry Andric   case AArch64::LD4Rv1d_POST:
4810b57cec5SDimitry Andric   case AArch64::LD4Rv2s_POST:
4820b57cec5SDimitry Andric   case AArch64::LD4Rv4h_POST:
4830b57cec5SDimitry Andric   case AArch64::LD4Rv8b_POST:
4840b57cec5SDimitry Andric   case AArch64::LD4Rv2d_POST:
4850b57cec5SDimitry Andric   case AArch64::LD4Rv4s_POST:
4860b57cec5SDimitry Andric   case AArch64::LD4Rv8h_POST:
4870b57cec5SDimitry Andric   case AArch64::LD4Rv16b_POST:
4880b57cec5SDimitry Andric     DestRegIdx = -1;
4890b57cec5SDimitry Andric     BaseRegIdx = 2;
4900b57cec5SDimitry Andric     OffsetIdx = 3;
4910b57cec5SDimitry Andric     IsPrePost = true;
4920b57cec5SDimitry Andric     break;
4930b57cec5SDimitry Andric 
4940b57cec5SDimitry Andric   case AArch64::LDRBBroW:
4950b57cec5SDimitry Andric   case AArch64::LDRBBroX:
4960b57cec5SDimitry Andric   case AArch64::LDRBBui:
4970b57cec5SDimitry Andric   case AArch64::LDRBroW:
4980b57cec5SDimitry Andric   case AArch64::LDRBroX:
4990b57cec5SDimitry Andric   case AArch64::LDRBui:
5000b57cec5SDimitry Andric   case AArch64::LDRDl:
5010b57cec5SDimitry Andric   case AArch64::LDRDroW:
5020b57cec5SDimitry Andric   case AArch64::LDRDroX:
5030b57cec5SDimitry Andric   case AArch64::LDRDui:
5040b57cec5SDimitry Andric   case AArch64::LDRHHroW:
5050b57cec5SDimitry Andric   case AArch64::LDRHHroX:
5060b57cec5SDimitry Andric   case AArch64::LDRHHui:
5070b57cec5SDimitry Andric   case AArch64::LDRHroW:
5080b57cec5SDimitry Andric   case AArch64::LDRHroX:
5090b57cec5SDimitry Andric   case AArch64::LDRHui:
5100b57cec5SDimitry Andric   case AArch64::LDRQl:
5110b57cec5SDimitry Andric   case AArch64::LDRQroW:
5120b57cec5SDimitry Andric   case AArch64::LDRQroX:
5130b57cec5SDimitry Andric   case AArch64::LDRQui:
5140b57cec5SDimitry Andric   case AArch64::LDRSBWroW:
5150b57cec5SDimitry Andric   case AArch64::LDRSBWroX:
5160b57cec5SDimitry Andric   case AArch64::LDRSBWui:
5170b57cec5SDimitry Andric   case AArch64::LDRSBXroW:
5180b57cec5SDimitry Andric   case AArch64::LDRSBXroX:
5190b57cec5SDimitry Andric   case AArch64::LDRSBXui:
5200b57cec5SDimitry Andric   case AArch64::LDRSHWroW:
5210b57cec5SDimitry Andric   case AArch64::LDRSHWroX:
5220b57cec5SDimitry Andric   case AArch64::LDRSHWui:
5230b57cec5SDimitry Andric   case AArch64::LDRSHXroW:
5240b57cec5SDimitry Andric   case AArch64::LDRSHXroX:
5250b57cec5SDimitry Andric   case AArch64::LDRSHXui:
5260b57cec5SDimitry Andric   case AArch64::LDRSWl:
5270b57cec5SDimitry Andric   case AArch64::LDRSWroW:
5280b57cec5SDimitry Andric   case AArch64::LDRSWroX:
5290b57cec5SDimitry Andric   case AArch64::LDRSWui:
5300b57cec5SDimitry Andric   case AArch64::LDRSl:
5310b57cec5SDimitry Andric   case AArch64::LDRSroW:
5320b57cec5SDimitry Andric   case AArch64::LDRSroX:
5330b57cec5SDimitry Andric   case AArch64::LDRSui:
5340b57cec5SDimitry Andric   case AArch64::LDRWl:
5350b57cec5SDimitry Andric   case AArch64::LDRWroW:
5360b57cec5SDimitry Andric   case AArch64::LDRWroX:
5370b57cec5SDimitry Andric   case AArch64::LDRWui:
5380b57cec5SDimitry Andric   case AArch64::LDRXl:
5390b57cec5SDimitry Andric   case AArch64::LDRXroW:
5400b57cec5SDimitry Andric   case AArch64::LDRXroX:
5410b57cec5SDimitry Andric   case AArch64::LDRXui:
5420b57cec5SDimitry Andric   case AArch64::LDURBBi:
5430b57cec5SDimitry Andric   case AArch64::LDURBi:
5440b57cec5SDimitry Andric   case AArch64::LDURDi:
5450b57cec5SDimitry Andric   case AArch64::LDURHHi:
5460b57cec5SDimitry Andric   case AArch64::LDURHi:
5470b57cec5SDimitry Andric   case AArch64::LDURQi:
5480b57cec5SDimitry Andric   case AArch64::LDURSBWi:
5490b57cec5SDimitry Andric   case AArch64::LDURSBXi:
5500b57cec5SDimitry Andric   case AArch64::LDURSHWi:
5510b57cec5SDimitry Andric   case AArch64::LDURSHXi:
5520b57cec5SDimitry Andric   case AArch64::LDURSWi:
5530b57cec5SDimitry Andric   case AArch64::LDURSi:
5540b57cec5SDimitry Andric   case AArch64::LDURWi:
5550b57cec5SDimitry Andric   case AArch64::LDURXi:
5560b57cec5SDimitry Andric     DestRegIdx = 0;
5570b57cec5SDimitry Andric     BaseRegIdx = 1;
5580b57cec5SDimitry Andric     OffsetIdx = 2;
5590b57cec5SDimitry Andric     IsPrePost = false;
5600b57cec5SDimitry Andric     break;
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric   case AArch64::LDRBBpost:
5630b57cec5SDimitry Andric   case AArch64::LDRBBpre:
5640b57cec5SDimitry Andric   case AArch64::LDRBpost:
5650b57cec5SDimitry Andric   case AArch64::LDRBpre:
5660b57cec5SDimitry Andric   case AArch64::LDRDpost:
5670b57cec5SDimitry Andric   case AArch64::LDRDpre:
5680b57cec5SDimitry Andric   case AArch64::LDRHHpost:
5690b57cec5SDimitry Andric   case AArch64::LDRHHpre:
5700b57cec5SDimitry Andric   case AArch64::LDRHpost:
5710b57cec5SDimitry Andric   case AArch64::LDRHpre:
5720b57cec5SDimitry Andric   case AArch64::LDRQpost:
5730b57cec5SDimitry Andric   case AArch64::LDRQpre:
5740b57cec5SDimitry Andric   case AArch64::LDRSBWpost:
5750b57cec5SDimitry Andric   case AArch64::LDRSBWpre:
5760b57cec5SDimitry Andric   case AArch64::LDRSBXpost:
5770b57cec5SDimitry Andric   case AArch64::LDRSBXpre:
5780b57cec5SDimitry Andric   case AArch64::LDRSHWpost:
5790b57cec5SDimitry Andric   case AArch64::LDRSHWpre:
5800b57cec5SDimitry Andric   case AArch64::LDRSHXpost:
5810b57cec5SDimitry Andric   case AArch64::LDRSHXpre:
5820b57cec5SDimitry Andric   case AArch64::LDRSWpost:
5830b57cec5SDimitry Andric   case AArch64::LDRSWpre:
5840b57cec5SDimitry Andric   case AArch64::LDRSpost:
5850b57cec5SDimitry Andric   case AArch64::LDRSpre:
5860b57cec5SDimitry Andric   case AArch64::LDRWpost:
5870b57cec5SDimitry Andric   case AArch64::LDRWpre:
5880b57cec5SDimitry Andric   case AArch64::LDRXpost:
5890b57cec5SDimitry Andric   case AArch64::LDRXpre:
5900b57cec5SDimitry Andric     DestRegIdx = 1;
5910b57cec5SDimitry Andric     BaseRegIdx = 2;
5920b57cec5SDimitry Andric     OffsetIdx = 3;
5930b57cec5SDimitry Andric     IsPrePost = true;
5940b57cec5SDimitry Andric     break;
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric   case AArch64::LDNPDi:
5970b57cec5SDimitry Andric   case AArch64::LDNPQi:
5980b57cec5SDimitry Andric   case AArch64::LDNPSi:
5990b57cec5SDimitry Andric   case AArch64::LDPQi:
6000b57cec5SDimitry Andric   case AArch64::LDPDi:
6010b57cec5SDimitry Andric   case AArch64::LDPSi:
6020b57cec5SDimitry Andric     DestRegIdx = -1;
6030b57cec5SDimitry Andric     BaseRegIdx = 2;
6040b57cec5SDimitry Andric     OffsetIdx = 3;
6050b57cec5SDimitry Andric     IsPrePost = false;
6060b57cec5SDimitry Andric     break;
6070b57cec5SDimitry Andric 
6080b57cec5SDimitry Andric   case AArch64::LDPSWi:
6090b57cec5SDimitry Andric   case AArch64::LDPWi:
6100b57cec5SDimitry Andric   case AArch64::LDPXi:
6110b57cec5SDimitry Andric     DestRegIdx = 0;
6120b57cec5SDimitry Andric     BaseRegIdx = 2;
6130b57cec5SDimitry Andric     OffsetIdx = 3;
6140b57cec5SDimitry Andric     IsPrePost = false;
6150b57cec5SDimitry Andric     break;
6160b57cec5SDimitry Andric 
6170b57cec5SDimitry Andric   case AArch64::LDPQpost:
6180b57cec5SDimitry Andric   case AArch64::LDPQpre:
6190b57cec5SDimitry Andric   case AArch64::LDPDpost:
6200b57cec5SDimitry Andric   case AArch64::LDPDpre:
6210b57cec5SDimitry Andric   case AArch64::LDPSpost:
6220b57cec5SDimitry Andric   case AArch64::LDPSpre:
6230b57cec5SDimitry Andric     DestRegIdx = -1;
6240b57cec5SDimitry Andric     BaseRegIdx = 3;
6250b57cec5SDimitry Andric     OffsetIdx = 4;
6260b57cec5SDimitry Andric     IsPrePost = true;
6270b57cec5SDimitry Andric     break;
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric   case AArch64::LDPSWpost:
6300b57cec5SDimitry Andric   case AArch64::LDPSWpre:
6310b57cec5SDimitry Andric   case AArch64::LDPWpost:
6320b57cec5SDimitry Andric   case AArch64::LDPWpre:
6330b57cec5SDimitry Andric   case AArch64::LDPXpost:
6340b57cec5SDimitry Andric   case AArch64::LDPXpre:
6350b57cec5SDimitry Andric     DestRegIdx = 1;
6360b57cec5SDimitry Andric     BaseRegIdx = 3;
6370b57cec5SDimitry Andric     OffsetIdx = 4;
6380b57cec5SDimitry Andric     IsPrePost = true;
6390b57cec5SDimitry Andric     break;
6400b57cec5SDimitry Andric   }
6410b57cec5SDimitry Andric 
6420b57cec5SDimitry Andric   // Loads from the stack pointer don't get prefetched.
6438bcb0991SDimitry Andric   Register BaseReg = MI.getOperand(BaseRegIdx).getReg();
6440b57cec5SDimitry Andric   if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP)
645bdd1243dSDimitry Andric     return std::nullopt;
6460b57cec5SDimitry Andric 
6470b57cec5SDimitry Andric   LoadInfo LI;
6480b57cec5SDimitry Andric   LI.DestReg = DestRegIdx == -1 ? Register() : MI.getOperand(DestRegIdx).getReg();
6490b57cec5SDimitry Andric   LI.BaseReg = BaseReg;
6500b57cec5SDimitry Andric   LI.BaseRegIdx = BaseRegIdx;
6510b57cec5SDimitry Andric   LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx);
6520b57cec5SDimitry Andric   LI.IsPrePost = IsPrePost;
6530b57cec5SDimitry Andric   return LI;
6540b57cec5SDimitry Andric }
6550b57cec5SDimitry Andric 
656bdd1243dSDimitry Andric static std::optional<unsigned> getTag(const TargetRegisterInfo *TRI,
657bdd1243dSDimitry Andric                                       const MachineInstr &MI,
658bdd1243dSDimitry Andric                                       const LoadInfo &LI) {
6590b57cec5SDimitry Andric   unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0;
6600b57cec5SDimitry Andric   unsigned Base = TRI->getEncodingValue(LI.BaseReg);
6610b57cec5SDimitry Andric   unsigned Off;
6620b57cec5SDimitry Andric   if (LI.OffsetOpnd == nullptr)
6630b57cec5SDimitry Andric     Off = 0;
6640b57cec5SDimitry Andric   else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() ||
6650b57cec5SDimitry Andric            LI.OffsetOpnd->isCPI())
666bdd1243dSDimitry Andric     return std::nullopt;
6670b57cec5SDimitry Andric   else if (LI.OffsetOpnd->isReg())
6680b57cec5SDimitry Andric     Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg());
6690b57cec5SDimitry Andric   else
6700b57cec5SDimitry Andric     Off = LI.OffsetOpnd->getImm() >> 2;
6710b57cec5SDimitry Andric 
6720b57cec5SDimitry Andric   return makeTag(Dest, Base, Off);
6730b57cec5SDimitry Andric }
6740b57cec5SDimitry Andric 
6750b57cec5SDimitry Andric void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
6760b57cec5SDimitry Andric   // Build the initial tag map for the whole loop.
6770b57cec5SDimitry Andric   TagMap.clear();
6780b57cec5SDimitry Andric   for (MachineBasicBlock *MBB : L.getBlocks())
6790b57cec5SDimitry Andric     for (MachineInstr &MI : *MBB) {
680bdd1243dSDimitry Andric       std::optional<LoadInfo> LInfo = getLoadInfo(MI);
6810b57cec5SDimitry Andric       if (!LInfo)
6820b57cec5SDimitry Andric         continue;
683bdd1243dSDimitry Andric       std::optional<unsigned> Tag = getTag(TRI, MI, *LInfo);
6840b57cec5SDimitry Andric       if (!Tag)
6850b57cec5SDimitry Andric         continue;
6860b57cec5SDimitry Andric       TagMap[*Tag].push_back(&MI);
6870b57cec5SDimitry Andric     }
6880b57cec5SDimitry Andric 
6890b57cec5SDimitry Andric   bool AnyCollisions = false;
6900b57cec5SDimitry Andric   for (auto &P : TagMap) {
6910b57cec5SDimitry Andric     auto Size = P.second.size();
6920b57cec5SDimitry Andric     if (Size > 1) {
6930b57cec5SDimitry Andric       for (auto *MI : P.second) {
6940b57cec5SDimitry Andric         if (TII->isStridedAccess(*MI)) {
6950b57cec5SDimitry Andric           AnyCollisions = true;
6960b57cec5SDimitry Andric           break;
6970b57cec5SDimitry Andric         }
6980b57cec5SDimitry Andric       }
6990b57cec5SDimitry Andric     }
7000b57cec5SDimitry Andric     if (AnyCollisions)
7010b57cec5SDimitry Andric       break;
7020b57cec5SDimitry Andric   }
7030b57cec5SDimitry Andric   // Nothing to fix.
7040b57cec5SDimitry Andric   if (!AnyCollisions)
7050b57cec5SDimitry Andric     return;
7060b57cec5SDimitry Andric 
7070b57cec5SDimitry Andric   MachineRegisterInfo &MRI = Fn.getRegInfo();
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric   // Go through all the basic blocks in the current loop and fix any streaming
7100b57cec5SDimitry Andric   // loads to avoid collisions with any other loads.
7110b57cec5SDimitry Andric   LiveRegUnits LR(*TRI);
7120b57cec5SDimitry Andric   for (MachineBasicBlock *MBB : L.getBlocks()) {
7130b57cec5SDimitry Andric     LR.clear();
7140b57cec5SDimitry Andric     LR.addLiveOuts(*MBB);
7150b57cec5SDimitry Andric     for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) {
7160b57cec5SDimitry Andric       MachineInstr &MI = *I;
7170b57cec5SDimitry Andric       if (!TII->isStridedAccess(MI))
7180b57cec5SDimitry Andric         continue;
7190b57cec5SDimitry Andric 
720bdd1243dSDimitry Andric       std::optional<LoadInfo> OptLdI = getLoadInfo(MI);
7210b57cec5SDimitry Andric       if (!OptLdI)
7220b57cec5SDimitry Andric         continue;
7230b57cec5SDimitry Andric       LoadInfo LdI = *OptLdI;
724bdd1243dSDimitry Andric       std::optional<unsigned> OptOldTag = getTag(TRI, MI, LdI);
7250b57cec5SDimitry Andric       if (!OptOldTag)
7260b57cec5SDimitry Andric         continue;
7270b57cec5SDimitry Andric       auto &OldCollisions = TagMap[*OptOldTag];
7280b57cec5SDimitry Andric       if (OldCollisions.size() <= 1)
7290b57cec5SDimitry Andric         continue;
7300b57cec5SDimitry Andric 
7310b57cec5SDimitry Andric       bool Fixed = false;
7320b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Attempting to fix tag collision: " << MI);
7330b57cec5SDimitry Andric 
7340b57cec5SDimitry Andric       if (!DebugCounter::shouldExecute(FixCounter)) {
7350b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Skipping fix due to debug counter:\n  " << MI);
7360b57cec5SDimitry Andric         continue;
7370b57cec5SDimitry Andric       }
7380b57cec5SDimitry Andric 
7390b57cec5SDimitry Andric       // Add the non-base registers of MI as live so we don't use them as
7400b57cec5SDimitry Andric       // scratch registers.
7410b57cec5SDimitry Andric       for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) {
7420b57cec5SDimitry Andric         if (OpI == static_cast<unsigned>(LdI.BaseRegIdx))
7430b57cec5SDimitry Andric           continue;
7440b57cec5SDimitry Andric         MachineOperand &MO = MI.getOperand(OpI);
7450b57cec5SDimitry Andric         if (MO.isReg() && MO.readsReg())
7460b57cec5SDimitry Andric           LR.addReg(MO.getReg());
7470b57cec5SDimitry Andric       }
7480b57cec5SDimitry Andric 
7490b57cec5SDimitry Andric       for (unsigned ScratchReg : AArch64::GPR64RegClass) {
7500b57cec5SDimitry Andric         if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg))
7510b57cec5SDimitry Andric           continue;
7520b57cec5SDimitry Andric 
7530b57cec5SDimitry Andric         LoadInfo NewLdI(LdI);
7540b57cec5SDimitry Andric         NewLdI.BaseReg = ScratchReg;
7550b57cec5SDimitry Andric         unsigned NewTag = *getTag(TRI, MI, NewLdI);
7560b57cec5SDimitry Andric         // Scratch reg tag would collide too, so don't use it.
7570b57cec5SDimitry Andric         if (TagMap.count(NewTag))
7580b57cec5SDimitry Andric           continue;
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Changing base reg to: "
7610b57cec5SDimitry Andric                           << printReg(ScratchReg, TRI) << '\n');
7620b57cec5SDimitry Andric 
7630b57cec5SDimitry Andric         // Rewrite:
7640b57cec5SDimitry Andric         //   Xd = LOAD Xb, off
7650b57cec5SDimitry Andric         // to:
7660b57cec5SDimitry Andric         //   Xc = MOV Xb
7670b57cec5SDimitry Andric         //   Xd = LOAD Xc, off
7680b57cec5SDimitry Andric         DebugLoc DL = MI.getDebugLoc();
7690b57cec5SDimitry Andric         BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg)
7700b57cec5SDimitry Andric             .addReg(AArch64::XZR)
7710b57cec5SDimitry Andric             .addReg(LdI.BaseReg)
7720b57cec5SDimitry Andric             .addImm(0);
7730b57cec5SDimitry Andric         MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx);
7740b57cec5SDimitry Andric         BaseOpnd.setReg(ScratchReg);
7750b57cec5SDimitry Andric 
7760b57cec5SDimitry Andric         // If the load does a pre/post increment, then insert a MOV after as
7770b57cec5SDimitry Andric         // well to update the real base register.
7780b57cec5SDimitry Andric         if (LdI.IsPrePost) {
7790b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "Doing post MOV of incremented reg: "
7800b57cec5SDimitry Andric                             << printReg(ScratchReg, TRI) << '\n');
7810b57cec5SDimitry Andric           MI.getOperand(0).setReg(
7820b57cec5SDimitry Andric               ScratchReg); // Change tied operand pre/post update dest.
7830b57cec5SDimitry Andric           BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL,
7840b57cec5SDimitry Andric                   TII->get(AArch64::ORRXrs), LdI.BaseReg)
7850b57cec5SDimitry Andric               .addReg(AArch64::XZR)
7860b57cec5SDimitry Andric               .addReg(ScratchReg)
7870b57cec5SDimitry Andric               .addImm(0);
7880b57cec5SDimitry Andric         }
7890b57cec5SDimitry Andric 
7900b57cec5SDimitry Andric         for (int I = 0, E = OldCollisions.size(); I != E; ++I)
7910b57cec5SDimitry Andric           if (OldCollisions[I] == &MI) {
7920b57cec5SDimitry Andric             std::swap(OldCollisions[I], OldCollisions[E - 1]);
7930b57cec5SDimitry Andric             OldCollisions.pop_back();
7940b57cec5SDimitry Andric             break;
7950b57cec5SDimitry Andric           }
7960b57cec5SDimitry Andric 
7970b57cec5SDimitry Andric         // Update TagMap to reflect instruction changes to reduce the number
7980b57cec5SDimitry Andric         // of later MOVs to be inserted.  This needs to be done after
7990b57cec5SDimitry Andric         // OldCollisions is updated since it may be relocated by this
8000b57cec5SDimitry Andric         // insertion.
8010b57cec5SDimitry Andric         TagMap[NewTag].push_back(&MI);
8020b57cec5SDimitry Andric         ++NumCollisionsAvoided;
8030b57cec5SDimitry Andric         Fixed = true;
8040b57cec5SDimitry Andric         Modified = true;
8050b57cec5SDimitry Andric         break;
8060b57cec5SDimitry Andric       }
8070b57cec5SDimitry Andric       if (!Fixed)
8080b57cec5SDimitry Andric         ++NumCollisionsNotAvoided;
8090b57cec5SDimitry Andric     }
8100b57cec5SDimitry Andric   }
8110b57cec5SDimitry Andric }
8120b57cec5SDimitry Andric 
8130b57cec5SDimitry Andric bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
81481ad6265SDimitry Andric   auto &ST = Fn.getSubtarget<AArch64Subtarget>();
8150b57cec5SDimitry Andric   if (ST.getProcFamily() != AArch64Subtarget::Falkor)
8160b57cec5SDimitry Andric     return false;
8170b57cec5SDimitry Andric 
8180b57cec5SDimitry Andric   if (skipFunction(Fn.getFunction()))
8190b57cec5SDimitry Andric     return false;
8200b57cec5SDimitry Andric 
8210b57cec5SDimitry Andric   TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
8220b57cec5SDimitry Andric   TRI = ST.getRegisterInfo();
8230b57cec5SDimitry Andric 
824*0fca6ea1SDimitry Andric   MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
8250b57cec5SDimitry Andric 
8260b57cec5SDimitry Andric   Modified = false;
8270b57cec5SDimitry Andric 
8280b57cec5SDimitry Andric   for (MachineLoop *I : LI)
8290eae32dcSDimitry Andric     for (MachineLoop *L : depth_first(I))
8300b57cec5SDimitry Andric       // Only process inner-loops
831e8d8bef9SDimitry Andric       if (L->isInnermost())
8320eae32dcSDimitry Andric         runOnLoop(*L, Fn);
8330b57cec5SDimitry Andric 
8340b57cec5SDimitry Andric   return Modified;
8350b57cec5SDimitry Andric }
8360b57cec5SDimitry Andric 
8370b57cec5SDimitry Andric FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); }
838