10b57cec5SDimitry Andric //===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions 90b57cec5SDimitry Andric /// that may inhibit the HW prefetching. This is done in two steps. Before 100b57cec5SDimitry Andric /// ISel, we mark strided loads (i.e. those that will likely benefit from 110b57cec5SDimitry Andric /// prefetching) with metadata. Then, after opcodes have been finalized, we 120b57cec5SDimitry Andric /// insert MOVs and re-write loads to prevent unintentional tag collisions. 130b57cec5SDimitry Andric // ===---------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AArch64.h" 160b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 170b57cec5SDimitry Andric #include "AArch64Subtarget.h" 180b57cec5SDimitry Andric #include "AArch64TargetMachine.h" 190b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 200b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h" 210b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 220b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 260b57cec5SDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h" 270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 290b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 300b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 310b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 320b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 330b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 350b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 360b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 370b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 380b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 390b57cec5SDimitry Andric #include "llvm/IR/Function.h" 400b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 410b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 420b57cec5SDimitry Andric #include "llvm/IR/Metadata.h" 43480093f4SDimitry Andric #include "llvm/InitializePasses.h" 440b57cec5SDimitry Andric #include "llvm/Pass.h" 450b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 460b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 470b57cec5SDimitry Andric #include "llvm/Support/DebugCounter.h" 480b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 490b57cec5SDimitry Andric #include <iterator> 500b57cec5SDimitry Andric #include <utility> 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric using namespace llvm; 530b57cec5SDimitry Andric 54e8d8bef9SDimitry Andric #define DEBUG_TYPE "aarch64-falkor-hwpf-fix" 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked"); 570b57cec5SDimitry Andric STATISTIC(NumCollisionsAvoided, 580b57cec5SDimitry Andric "Number of HW prefetch tag collisions avoided"); 590b57cec5SDimitry Andric STATISTIC(NumCollisionsNotAvoided, 600b57cec5SDimitry Andric "Number of HW prefetch tag collisions not avoided due to lack of registers"); 610b57cec5SDimitry Andric DEBUG_COUNTER(FixCounter, "falkor-hwpf", 620b57cec5SDimitry Andric "Controls which tag collisions are avoided"); 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric namespace { 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric class FalkorMarkStridedAccesses { 670b57cec5SDimitry Andric public: 680b57cec5SDimitry Andric FalkorMarkStridedAccesses(LoopInfo &LI, ScalarEvolution &SE) 690b57cec5SDimitry Andric : LI(LI), SE(SE) {} 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric bool run(); 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric private: 740b57cec5SDimitry Andric bool runOnLoop(Loop &L); 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric LoopInfo &LI; 770b57cec5SDimitry Andric ScalarEvolution &SE; 780b57cec5SDimitry Andric }; 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric class FalkorMarkStridedAccessesLegacy : public FunctionPass { 810b57cec5SDimitry Andric public: 820b57cec5SDimitry Andric static char ID; // Pass ID, replacement for typeid 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric FalkorMarkStridedAccessesLegacy() : FunctionPass(ID) { 850b57cec5SDimitry Andric initializeFalkorMarkStridedAccessesLegacyPass( 860b57cec5SDimitry Andric *PassRegistry::getPassRegistry()); 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 900b57cec5SDimitry Andric AU.addRequired<TargetPassConfig>(); 910b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 920b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>(); 930b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>(); 940b57cec5SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>(); 950b57cec5SDimitry Andric AU.addPreserved<ScalarEvolutionWrapperPass>(); 960b57cec5SDimitry Andric } 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric bool runOnFunction(Function &F) override; 990b57cec5SDimitry Andric }; 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric } // end anonymous namespace 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric char FalkorMarkStridedAccessesLegacy::ID = 0; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE, 1060b57cec5SDimitry Andric "Falkor HW Prefetch Fix", false, false) 1070b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 1080b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1090b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1100b57cec5SDimitry Andric INITIALIZE_PASS_END(FalkorMarkStridedAccessesLegacy, DEBUG_TYPE, 1110b57cec5SDimitry Andric "Falkor HW Prefetch Fix", false, false) 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric FunctionPass *llvm::createFalkorMarkStridedAccessesPass() { 1140b57cec5SDimitry Andric return new FalkorMarkStridedAccessesLegacy(); 1150b57cec5SDimitry Andric } 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric bool FalkorMarkStridedAccessesLegacy::runOnFunction(Function &F) { 1180b57cec5SDimitry Andric TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); 1190b57cec5SDimitry Andric const AArch64Subtarget *ST = 1200b57cec5SDimitry Andric TPC.getTM<AArch64TargetMachine>().getSubtargetImpl(F); 1210b57cec5SDimitry Andric if (ST->getProcFamily() != AArch64Subtarget::Falkor) 1220b57cec5SDimitry Andric return false; 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric if (skipFunction(F)) 1250b57cec5SDimitry Andric return false; 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1280b57cec5SDimitry Andric ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric FalkorMarkStridedAccesses LDP(LI, SE); 1310b57cec5SDimitry Andric return LDP.run(); 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric bool FalkorMarkStridedAccesses::run() { 1350b57cec5SDimitry Andric bool MadeChange = false; 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric for (Loop *L : LI) 1380eae32dcSDimitry Andric for (Loop *LIt : depth_first(L)) 1390eae32dcSDimitry Andric MadeChange |= runOnLoop(*LIt); 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric return MadeChange; 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andric bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) { 1450b57cec5SDimitry Andric // Only mark strided loads in the inner-most loop 146e8d8bef9SDimitry Andric if (!L.isInnermost()) 1470b57cec5SDimitry Andric return false; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric bool MadeChange = false; 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric for (BasicBlock *BB : L.blocks()) { 1520b57cec5SDimitry Andric for (Instruction &I : *BB) { 1530b57cec5SDimitry Andric LoadInst *LoadI = dyn_cast<LoadInst>(&I); 1540b57cec5SDimitry Andric if (!LoadI) 1550b57cec5SDimitry Andric continue; 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric Value *PtrValue = LoadI->getPointerOperand(); 1580b57cec5SDimitry Andric if (L.isLoopInvariant(PtrValue)) 1590b57cec5SDimitry Andric continue; 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric const SCEV *LSCEV = SE.getSCEV(PtrValue); 1620b57cec5SDimitry Andric const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); 1630b57cec5SDimitry Andric if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) 1640b57cec5SDimitry Andric continue; 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric LoadI->setMetadata(FALKOR_STRIDED_ACCESS_MD, 1670b57cec5SDimitry Andric MDNode::get(LoadI->getContext(), {})); 1680b57cec5SDimitry Andric ++NumStridedLoadsMarked; 1690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Load: " << I << " marked as strided\n"); 1700b57cec5SDimitry Andric MadeChange = true; 1710b57cec5SDimitry Andric } 1720b57cec5SDimitry Andric } 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric return MadeChange; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric namespace { 1780b57cec5SDimitry Andric 1790b57cec5SDimitry Andric class FalkorHWPFFix : public MachineFunctionPass { 1800b57cec5SDimitry Andric public: 1810b57cec5SDimitry Andric static char ID; 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric FalkorHWPFFix() : MachineFunctionPass(ID) { 1840b57cec5SDimitry Andric initializeFalkorHWPFFixPass(*PassRegistry::getPassRegistry()); 1850b57cec5SDimitry Andric } 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override; 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1900b57cec5SDimitry Andric AU.setPreservesCFG(); 191*0fca6ea1SDimitry Andric AU.addRequired<MachineLoopInfoWrapperPass>(); 1920b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 1930b57cec5SDimitry Andric } 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 1960b57cec5SDimitry Andric return MachineFunctionProperties().set( 1970b57cec5SDimitry Andric MachineFunctionProperties::Property::NoVRegs); 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric private: 2010b57cec5SDimitry Andric void runOnLoop(MachineLoop &L, MachineFunction &Fn); 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric const AArch64InstrInfo *TII; 2040b57cec5SDimitry Andric const TargetRegisterInfo *TRI; 2050b57cec5SDimitry Andric DenseMap<unsigned, SmallVector<MachineInstr *, 4>> TagMap; 2060b57cec5SDimitry Andric bool Modified; 2070b57cec5SDimitry Andric }; 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric /// Bits from load opcodes used to compute HW prefetcher instruction tags. 2100b57cec5SDimitry Andric struct LoadInfo { 2110b57cec5SDimitry Andric LoadInfo() = default; 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric Register DestReg; 2140b57cec5SDimitry Andric Register BaseReg; 2150b57cec5SDimitry Andric int BaseRegIdx = -1; 2160b57cec5SDimitry Andric const MachineOperand *OffsetOpnd = nullptr; 2170b57cec5SDimitry Andric bool IsPrePost = false; 2180b57cec5SDimitry Andric }; 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric } // end anonymous namespace 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric char FalkorHWPFFix::ID = 0; 2230b57cec5SDimitry Andric 224e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late", 2250b57cec5SDimitry Andric "Falkor HW Prefetch Fix Late Phase", false, false) 226*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 227e8d8bef9SDimitry Andric INITIALIZE_PASS_END(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late", 2280b57cec5SDimitry Andric "Falkor HW Prefetch Fix Late Phase", false, false) 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) { 2310b57cec5SDimitry Andric return (Dest & 0xf) | ((Base & 0xf) << 4) | ((Offset & 0x3f) << 8); 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric 234bdd1243dSDimitry Andric static std::optional<LoadInfo> getLoadInfo(const MachineInstr &MI) { 2350b57cec5SDimitry Andric int DestRegIdx; 2360b57cec5SDimitry Andric int BaseRegIdx; 2370b57cec5SDimitry Andric int OffsetIdx; 2380b57cec5SDimitry Andric bool IsPrePost; 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric switch (MI.getOpcode()) { 2410b57cec5SDimitry Andric default: 242bdd1243dSDimitry Andric return std::nullopt; 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric case AArch64::LD1i64: 2450b57cec5SDimitry Andric case AArch64::LD2i64: 2460b57cec5SDimitry Andric DestRegIdx = 0; 2470b57cec5SDimitry Andric BaseRegIdx = 3; 2480b57cec5SDimitry Andric OffsetIdx = -1; 2490b57cec5SDimitry Andric IsPrePost = false; 2500b57cec5SDimitry Andric break; 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric case AArch64::LD1i8: 2530b57cec5SDimitry Andric case AArch64::LD1i16: 2540b57cec5SDimitry Andric case AArch64::LD1i32: 2550b57cec5SDimitry Andric case AArch64::LD2i8: 2560b57cec5SDimitry Andric case AArch64::LD2i16: 2570b57cec5SDimitry Andric case AArch64::LD2i32: 2580b57cec5SDimitry Andric case AArch64::LD3i8: 2590b57cec5SDimitry Andric case AArch64::LD3i16: 2600b57cec5SDimitry Andric case AArch64::LD3i32: 2610b57cec5SDimitry Andric case AArch64::LD3i64: 2620b57cec5SDimitry Andric case AArch64::LD4i8: 2630b57cec5SDimitry Andric case AArch64::LD4i16: 2640b57cec5SDimitry Andric case AArch64::LD4i32: 2650b57cec5SDimitry Andric case AArch64::LD4i64: 2660b57cec5SDimitry Andric DestRegIdx = -1; 2670b57cec5SDimitry Andric BaseRegIdx = 3; 2680b57cec5SDimitry Andric OffsetIdx = -1; 2690b57cec5SDimitry Andric IsPrePost = false; 2700b57cec5SDimitry Andric break; 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric case AArch64::LD1Onev1d: 2730b57cec5SDimitry Andric case AArch64::LD1Onev2s: 2740b57cec5SDimitry Andric case AArch64::LD1Onev4h: 2750b57cec5SDimitry Andric case AArch64::LD1Onev8b: 2760b57cec5SDimitry Andric case AArch64::LD1Onev2d: 2770b57cec5SDimitry Andric case AArch64::LD1Onev4s: 2780b57cec5SDimitry Andric case AArch64::LD1Onev8h: 2790b57cec5SDimitry Andric case AArch64::LD1Onev16b: 2800b57cec5SDimitry Andric case AArch64::LD1Rv1d: 2810b57cec5SDimitry Andric case AArch64::LD1Rv2s: 2820b57cec5SDimitry Andric case AArch64::LD1Rv4h: 2830b57cec5SDimitry Andric case AArch64::LD1Rv8b: 2840b57cec5SDimitry Andric case AArch64::LD1Rv2d: 2850b57cec5SDimitry Andric case AArch64::LD1Rv4s: 2860b57cec5SDimitry Andric case AArch64::LD1Rv8h: 2870b57cec5SDimitry Andric case AArch64::LD1Rv16b: 2880b57cec5SDimitry Andric DestRegIdx = 0; 2890b57cec5SDimitry Andric BaseRegIdx = 1; 2900b57cec5SDimitry Andric OffsetIdx = -1; 2910b57cec5SDimitry Andric IsPrePost = false; 2920b57cec5SDimitry Andric break; 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andric case AArch64::LD1Twov1d: 2950b57cec5SDimitry Andric case AArch64::LD1Twov2s: 2960b57cec5SDimitry Andric case AArch64::LD1Twov4h: 2970b57cec5SDimitry Andric case AArch64::LD1Twov8b: 2980b57cec5SDimitry Andric case AArch64::LD1Twov2d: 2990b57cec5SDimitry Andric case AArch64::LD1Twov4s: 3000b57cec5SDimitry Andric case AArch64::LD1Twov8h: 3010b57cec5SDimitry Andric case AArch64::LD1Twov16b: 3020b57cec5SDimitry Andric case AArch64::LD1Threev1d: 3030b57cec5SDimitry Andric case AArch64::LD1Threev2s: 3040b57cec5SDimitry Andric case AArch64::LD1Threev4h: 3050b57cec5SDimitry Andric case AArch64::LD1Threev8b: 3060b57cec5SDimitry Andric case AArch64::LD1Threev2d: 3070b57cec5SDimitry Andric case AArch64::LD1Threev4s: 3080b57cec5SDimitry Andric case AArch64::LD1Threev8h: 3090b57cec5SDimitry Andric case AArch64::LD1Threev16b: 3100b57cec5SDimitry Andric case AArch64::LD1Fourv1d: 3110b57cec5SDimitry Andric case AArch64::LD1Fourv2s: 3120b57cec5SDimitry Andric case AArch64::LD1Fourv4h: 3130b57cec5SDimitry Andric case AArch64::LD1Fourv8b: 3140b57cec5SDimitry Andric case AArch64::LD1Fourv2d: 3150b57cec5SDimitry Andric case AArch64::LD1Fourv4s: 3160b57cec5SDimitry Andric case AArch64::LD1Fourv8h: 3170b57cec5SDimitry Andric case AArch64::LD1Fourv16b: 3180b57cec5SDimitry Andric case AArch64::LD2Twov2s: 3190b57cec5SDimitry Andric case AArch64::LD2Twov4s: 3200b57cec5SDimitry Andric case AArch64::LD2Twov8b: 3210b57cec5SDimitry Andric case AArch64::LD2Twov2d: 3220b57cec5SDimitry Andric case AArch64::LD2Twov4h: 3230b57cec5SDimitry Andric case AArch64::LD2Twov8h: 3240b57cec5SDimitry Andric case AArch64::LD2Twov16b: 3250b57cec5SDimitry Andric case AArch64::LD2Rv1d: 3260b57cec5SDimitry Andric case AArch64::LD2Rv2s: 3270b57cec5SDimitry Andric case AArch64::LD2Rv4s: 3280b57cec5SDimitry Andric case AArch64::LD2Rv8b: 3290b57cec5SDimitry Andric case AArch64::LD2Rv2d: 3300b57cec5SDimitry Andric case AArch64::LD2Rv4h: 3310b57cec5SDimitry Andric case AArch64::LD2Rv8h: 3320b57cec5SDimitry Andric case AArch64::LD2Rv16b: 3330b57cec5SDimitry Andric case AArch64::LD3Threev2s: 3340b57cec5SDimitry Andric case AArch64::LD3Threev4h: 3350b57cec5SDimitry Andric case AArch64::LD3Threev8b: 3360b57cec5SDimitry Andric case AArch64::LD3Threev2d: 3370b57cec5SDimitry Andric case AArch64::LD3Threev4s: 3380b57cec5SDimitry Andric case AArch64::LD3Threev8h: 3390b57cec5SDimitry Andric case AArch64::LD3Threev16b: 3400b57cec5SDimitry Andric case AArch64::LD3Rv1d: 3410b57cec5SDimitry Andric case AArch64::LD3Rv2s: 3420b57cec5SDimitry Andric case AArch64::LD3Rv4h: 3430b57cec5SDimitry Andric case AArch64::LD3Rv8b: 3440b57cec5SDimitry Andric case AArch64::LD3Rv2d: 3450b57cec5SDimitry Andric case AArch64::LD3Rv4s: 3460b57cec5SDimitry Andric case AArch64::LD3Rv8h: 3470b57cec5SDimitry Andric case AArch64::LD3Rv16b: 3480b57cec5SDimitry Andric case AArch64::LD4Fourv2s: 3490b57cec5SDimitry Andric case AArch64::LD4Fourv4h: 3500b57cec5SDimitry Andric case AArch64::LD4Fourv8b: 3510b57cec5SDimitry Andric case AArch64::LD4Fourv2d: 3520b57cec5SDimitry Andric case AArch64::LD4Fourv4s: 3530b57cec5SDimitry Andric case AArch64::LD4Fourv8h: 3540b57cec5SDimitry Andric case AArch64::LD4Fourv16b: 3550b57cec5SDimitry Andric case AArch64::LD4Rv1d: 3560b57cec5SDimitry Andric case AArch64::LD4Rv2s: 3570b57cec5SDimitry Andric case AArch64::LD4Rv4h: 3580b57cec5SDimitry Andric case AArch64::LD4Rv8b: 3590b57cec5SDimitry Andric case AArch64::LD4Rv2d: 3600b57cec5SDimitry Andric case AArch64::LD4Rv4s: 3610b57cec5SDimitry Andric case AArch64::LD4Rv8h: 3620b57cec5SDimitry Andric case AArch64::LD4Rv16b: 3630b57cec5SDimitry Andric DestRegIdx = -1; 3640b57cec5SDimitry Andric BaseRegIdx = 1; 3650b57cec5SDimitry Andric OffsetIdx = -1; 3660b57cec5SDimitry Andric IsPrePost = false; 3670b57cec5SDimitry Andric break; 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric case AArch64::LD1i64_POST: 3700b57cec5SDimitry Andric case AArch64::LD2i64_POST: 3710b57cec5SDimitry Andric DestRegIdx = 1; 3720b57cec5SDimitry Andric BaseRegIdx = 4; 3730b57cec5SDimitry Andric OffsetIdx = 5; 3740b57cec5SDimitry Andric IsPrePost = true; 3750b57cec5SDimitry Andric break; 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric case AArch64::LD1i8_POST: 3780b57cec5SDimitry Andric case AArch64::LD1i16_POST: 3790b57cec5SDimitry Andric case AArch64::LD1i32_POST: 3800b57cec5SDimitry Andric case AArch64::LD2i8_POST: 3810b57cec5SDimitry Andric case AArch64::LD2i16_POST: 3820b57cec5SDimitry Andric case AArch64::LD2i32_POST: 3830b57cec5SDimitry Andric case AArch64::LD3i8_POST: 3840b57cec5SDimitry Andric case AArch64::LD3i16_POST: 3850b57cec5SDimitry Andric case AArch64::LD3i32_POST: 3860b57cec5SDimitry Andric case AArch64::LD3i64_POST: 3870b57cec5SDimitry Andric case AArch64::LD4i8_POST: 3880b57cec5SDimitry Andric case AArch64::LD4i16_POST: 3890b57cec5SDimitry Andric case AArch64::LD4i32_POST: 3900b57cec5SDimitry Andric case AArch64::LD4i64_POST: 3910b57cec5SDimitry Andric DestRegIdx = -1; 3920b57cec5SDimitry Andric BaseRegIdx = 4; 3930b57cec5SDimitry Andric OffsetIdx = 5; 3940b57cec5SDimitry Andric IsPrePost = true; 3950b57cec5SDimitry Andric break; 3960b57cec5SDimitry Andric 3970b57cec5SDimitry Andric case AArch64::LD1Onev1d_POST: 3980b57cec5SDimitry Andric case AArch64::LD1Onev2s_POST: 3990b57cec5SDimitry Andric case AArch64::LD1Onev4h_POST: 4000b57cec5SDimitry Andric case AArch64::LD1Onev8b_POST: 4010b57cec5SDimitry Andric case AArch64::LD1Onev2d_POST: 4020b57cec5SDimitry Andric case AArch64::LD1Onev4s_POST: 4030b57cec5SDimitry Andric case AArch64::LD1Onev8h_POST: 4040b57cec5SDimitry Andric case AArch64::LD1Onev16b_POST: 4050b57cec5SDimitry Andric case AArch64::LD1Rv1d_POST: 4060b57cec5SDimitry Andric case AArch64::LD1Rv2s_POST: 4070b57cec5SDimitry Andric case AArch64::LD1Rv4h_POST: 4080b57cec5SDimitry Andric case AArch64::LD1Rv8b_POST: 4090b57cec5SDimitry Andric case AArch64::LD1Rv2d_POST: 4100b57cec5SDimitry Andric case AArch64::LD1Rv4s_POST: 4110b57cec5SDimitry Andric case AArch64::LD1Rv8h_POST: 4120b57cec5SDimitry Andric case AArch64::LD1Rv16b_POST: 4130b57cec5SDimitry Andric DestRegIdx = 1; 4140b57cec5SDimitry Andric BaseRegIdx = 2; 4150b57cec5SDimitry Andric OffsetIdx = 3; 4160b57cec5SDimitry Andric IsPrePost = true; 4170b57cec5SDimitry Andric break; 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric case AArch64::LD1Twov1d_POST: 4200b57cec5SDimitry Andric case AArch64::LD1Twov2s_POST: 4210b57cec5SDimitry Andric case AArch64::LD1Twov4h_POST: 4220b57cec5SDimitry Andric case AArch64::LD1Twov8b_POST: 4230b57cec5SDimitry Andric case AArch64::LD1Twov2d_POST: 4240b57cec5SDimitry Andric case AArch64::LD1Twov4s_POST: 4250b57cec5SDimitry Andric case AArch64::LD1Twov8h_POST: 4260b57cec5SDimitry Andric case AArch64::LD1Twov16b_POST: 4270b57cec5SDimitry Andric case AArch64::LD1Threev1d_POST: 4280b57cec5SDimitry Andric case AArch64::LD1Threev2s_POST: 4290b57cec5SDimitry Andric case AArch64::LD1Threev4h_POST: 4300b57cec5SDimitry Andric case AArch64::LD1Threev8b_POST: 4310b57cec5SDimitry Andric case AArch64::LD1Threev2d_POST: 4320b57cec5SDimitry Andric case AArch64::LD1Threev4s_POST: 4330b57cec5SDimitry Andric case AArch64::LD1Threev8h_POST: 4340b57cec5SDimitry Andric case AArch64::LD1Threev16b_POST: 4350b57cec5SDimitry Andric case AArch64::LD1Fourv1d_POST: 4360b57cec5SDimitry Andric case AArch64::LD1Fourv2s_POST: 4370b57cec5SDimitry Andric case AArch64::LD1Fourv4h_POST: 4380b57cec5SDimitry Andric case AArch64::LD1Fourv8b_POST: 4390b57cec5SDimitry Andric case AArch64::LD1Fourv2d_POST: 4400b57cec5SDimitry Andric case AArch64::LD1Fourv4s_POST: 4410b57cec5SDimitry Andric case AArch64::LD1Fourv8h_POST: 4420b57cec5SDimitry Andric case AArch64::LD1Fourv16b_POST: 4430b57cec5SDimitry Andric case AArch64::LD2Twov2s_POST: 4440b57cec5SDimitry Andric case AArch64::LD2Twov4s_POST: 4450b57cec5SDimitry Andric case AArch64::LD2Twov8b_POST: 4460b57cec5SDimitry Andric case AArch64::LD2Twov2d_POST: 4470b57cec5SDimitry Andric case AArch64::LD2Twov4h_POST: 4480b57cec5SDimitry Andric case AArch64::LD2Twov8h_POST: 4490b57cec5SDimitry Andric case AArch64::LD2Twov16b_POST: 4500b57cec5SDimitry Andric case AArch64::LD2Rv1d_POST: 4510b57cec5SDimitry Andric case AArch64::LD2Rv2s_POST: 4520b57cec5SDimitry Andric case AArch64::LD2Rv4s_POST: 4530b57cec5SDimitry Andric case AArch64::LD2Rv8b_POST: 4540b57cec5SDimitry Andric case AArch64::LD2Rv2d_POST: 4550b57cec5SDimitry Andric case AArch64::LD2Rv4h_POST: 4560b57cec5SDimitry Andric case AArch64::LD2Rv8h_POST: 4570b57cec5SDimitry Andric case AArch64::LD2Rv16b_POST: 4580b57cec5SDimitry Andric case AArch64::LD3Threev2s_POST: 4590b57cec5SDimitry Andric case AArch64::LD3Threev4h_POST: 4600b57cec5SDimitry Andric case AArch64::LD3Threev8b_POST: 4610b57cec5SDimitry Andric case AArch64::LD3Threev2d_POST: 4620b57cec5SDimitry Andric case AArch64::LD3Threev4s_POST: 4630b57cec5SDimitry Andric case AArch64::LD3Threev8h_POST: 4640b57cec5SDimitry Andric case AArch64::LD3Threev16b_POST: 4650b57cec5SDimitry Andric case AArch64::LD3Rv1d_POST: 4660b57cec5SDimitry Andric case AArch64::LD3Rv2s_POST: 4670b57cec5SDimitry Andric case AArch64::LD3Rv4h_POST: 4680b57cec5SDimitry Andric case AArch64::LD3Rv8b_POST: 4690b57cec5SDimitry Andric case AArch64::LD3Rv2d_POST: 4700b57cec5SDimitry Andric case AArch64::LD3Rv4s_POST: 4710b57cec5SDimitry Andric case AArch64::LD3Rv8h_POST: 4720b57cec5SDimitry Andric case AArch64::LD3Rv16b_POST: 4730b57cec5SDimitry Andric case AArch64::LD4Fourv2s_POST: 4740b57cec5SDimitry Andric case AArch64::LD4Fourv4h_POST: 4750b57cec5SDimitry Andric case AArch64::LD4Fourv8b_POST: 4760b57cec5SDimitry Andric case AArch64::LD4Fourv2d_POST: 4770b57cec5SDimitry Andric case AArch64::LD4Fourv4s_POST: 4780b57cec5SDimitry Andric case AArch64::LD4Fourv8h_POST: 4790b57cec5SDimitry Andric case AArch64::LD4Fourv16b_POST: 4800b57cec5SDimitry Andric case AArch64::LD4Rv1d_POST: 4810b57cec5SDimitry Andric case AArch64::LD4Rv2s_POST: 4820b57cec5SDimitry Andric case AArch64::LD4Rv4h_POST: 4830b57cec5SDimitry Andric case AArch64::LD4Rv8b_POST: 4840b57cec5SDimitry Andric case AArch64::LD4Rv2d_POST: 4850b57cec5SDimitry Andric case AArch64::LD4Rv4s_POST: 4860b57cec5SDimitry Andric case AArch64::LD4Rv8h_POST: 4870b57cec5SDimitry Andric case AArch64::LD4Rv16b_POST: 4880b57cec5SDimitry Andric DestRegIdx = -1; 4890b57cec5SDimitry Andric BaseRegIdx = 2; 4900b57cec5SDimitry Andric OffsetIdx = 3; 4910b57cec5SDimitry Andric IsPrePost = true; 4920b57cec5SDimitry Andric break; 4930b57cec5SDimitry Andric 4940b57cec5SDimitry Andric case AArch64::LDRBBroW: 4950b57cec5SDimitry Andric case AArch64::LDRBBroX: 4960b57cec5SDimitry Andric case AArch64::LDRBBui: 4970b57cec5SDimitry Andric case AArch64::LDRBroW: 4980b57cec5SDimitry Andric case AArch64::LDRBroX: 4990b57cec5SDimitry Andric case AArch64::LDRBui: 5000b57cec5SDimitry Andric case AArch64::LDRDl: 5010b57cec5SDimitry Andric case AArch64::LDRDroW: 5020b57cec5SDimitry Andric case AArch64::LDRDroX: 5030b57cec5SDimitry Andric case AArch64::LDRDui: 5040b57cec5SDimitry Andric case AArch64::LDRHHroW: 5050b57cec5SDimitry Andric case AArch64::LDRHHroX: 5060b57cec5SDimitry Andric case AArch64::LDRHHui: 5070b57cec5SDimitry Andric case AArch64::LDRHroW: 5080b57cec5SDimitry Andric case AArch64::LDRHroX: 5090b57cec5SDimitry Andric case AArch64::LDRHui: 5100b57cec5SDimitry Andric case AArch64::LDRQl: 5110b57cec5SDimitry Andric case AArch64::LDRQroW: 5120b57cec5SDimitry Andric case AArch64::LDRQroX: 5130b57cec5SDimitry Andric case AArch64::LDRQui: 5140b57cec5SDimitry Andric case AArch64::LDRSBWroW: 5150b57cec5SDimitry Andric case AArch64::LDRSBWroX: 5160b57cec5SDimitry Andric case AArch64::LDRSBWui: 5170b57cec5SDimitry Andric case AArch64::LDRSBXroW: 5180b57cec5SDimitry Andric case AArch64::LDRSBXroX: 5190b57cec5SDimitry Andric case AArch64::LDRSBXui: 5200b57cec5SDimitry Andric case AArch64::LDRSHWroW: 5210b57cec5SDimitry Andric case AArch64::LDRSHWroX: 5220b57cec5SDimitry Andric case AArch64::LDRSHWui: 5230b57cec5SDimitry Andric case AArch64::LDRSHXroW: 5240b57cec5SDimitry Andric case AArch64::LDRSHXroX: 5250b57cec5SDimitry Andric case AArch64::LDRSHXui: 5260b57cec5SDimitry Andric case AArch64::LDRSWl: 5270b57cec5SDimitry Andric case AArch64::LDRSWroW: 5280b57cec5SDimitry Andric case AArch64::LDRSWroX: 5290b57cec5SDimitry Andric case AArch64::LDRSWui: 5300b57cec5SDimitry Andric case AArch64::LDRSl: 5310b57cec5SDimitry Andric case AArch64::LDRSroW: 5320b57cec5SDimitry Andric case AArch64::LDRSroX: 5330b57cec5SDimitry Andric case AArch64::LDRSui: 5340b57cec5SDimitry Andric case AArch64::LDRWl: 5350b57cec5SDimitry Andric case AArch64::LDRWroW: 5360b57cec5SDimitry Andric case AArch64::LDRWroX: 5370b57cec5SDimitry Andric case AArch64::LDRWui: 5380b57cec5SDimitry Andric case AArch64::LDRXl: 5390b57cec5SDimitry Andric case AArch64::LDRXroW: 5400b57cec5SDimitry Andric case AArch64::LDRXroX: 5410b57cec5SDimitry Andric case AArch64::LDRXui: 5420b57cec5SDimitry Andric case AArch64::LDURBBi: 5430b57cec5SDimitry Andric case AArch64::LDURBi: 5440b57cec5SDimitry Andric case AArch64::LDURDi: 5450b57cec5SDimitry Andric case AArch64::LDURHHi: 5460b57cec5SDimitry Andric case AArch64::LDURHi: 5470b57cec5SDimitry Andric case AArch64::LDURQi: 5480b57cec5SDimitry Andric case AArch64::LDURSBWi: 5490b57cec5SDimitry Andric case AArch64::LDURSBXi: 5500b57cec5SDimitry Andric case AArch64::LDURSHWi: 5510b57cec5SDimitry Andric case AArch64::LDURSHXi: 5520b57cec5SDimitry Andric case AArch64::LDURSWi: 5530b57cec5SDimitry Andric case AArch64::LDURSi: 5540b57cec5SDimitry Andric case AArch64::LDURWi: 5550b57cec5SDimitry Andric case AArch64::LDURXi: 5560b57cec5SDimitry Andric DestRegIdx = 0; 5570b57cec5SDimitry Andric BaseRegIdx = 1; 5580b57cec5SDimitry Andric OffsetIdx = 2; 5590b57cec5SDimitry Andric IsPrePost = false; 5600b57cec5SDimitry Andric break; 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric case AArch64::LDRBBpost: 5630b57cec5SDimitry Andric case AArch64::LDRBBpre: 5640b57cec5SDimitry Andric case AArch64::LDRBpost: 5650b57cec5SDimitry Andric case AArch64::LDRBpre: 5660b57cec5SDimitry Andric case AArch64::LDRDpost: 5670b57cec5SDimitry Andric case AArch64::LDRDpre: 5680b57cec5SDimitry Andric case AArch64::LDRHHpost: 5690b57cec5SDimitry Andric case AArch64::LDRHHpre: 5700b57cec5SDimitry Andric case AArch64::LDRHpost: 5710b57cec5SDimitry Andric case AArch64::LDRHpre: 5720b57cec5SDimitry Andric case AArch64::LDRQpost: 5730b57cec5SDimitry Andric case AArch64::LDRQpre: 5740b57cec5SDimitry Andric case AArch64::LDRSBWpost: 5750b57cec5SDimitry Andric case AArch64::LDRSBWpre: 5760b57cec5SDimitry Andric case AArch64::LDRSBXpost: 5770b57cec5SDimitry Andric case AArch64::LDRSBXpre: 5780b57cec5SDimitry Andric case AArch64::LDRSHWpost: 5790b57cec5SDimitry Andric case AArch64::LDRSHWpre: 5800b57cec5SDimitry Andric case AArch64::LDRSHXpost: 5810b57cec5SDimitry Andric case AArch64::LDRSHXpre: 5820b57cec5SDimitry Andric case AArch64::LDRSWpost: 5830b57cec5SDimitry Andric case AArch64::LDRSWpre: 5840b57cec5SDimitry Andric case AArch64::LDRSpost: 5850b57cec5SDimitry Andric case AArch64::LDRSpre: 5860b57cec5SDimitry Andric case AArch64::LDRWpost: 5870b57cec5SDimitry Andric case AArch64::LDRWpre: 5880b57cec5SDimitry Andric case AArch64::LDRXpost: 5890b57cec5SDimitry Andric case AArch64::LDRXpre: 5900b57cec5SDimitry Andric DestRegIdx = 1; 5910b57cec5SDimitry Andric BaseRegIdx = 2; 5920b57cec5SDimitry Andric OffsetIdx = 3; 5930b57cec5SDimitry Andric IsPrePost = true; 5940b57cec5SDimitry Andric break; 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric case AArch64::LDNPDi: 5970b57cec5SDimitry Andric case AArch64::LDNPQi: 5980b57cec5SDimitry Andric case AArch64::LDNPSi: 5990b57cec5SDimitry Andric case AArch64::LDPQi: 6000b57cec5SDimitry Andric case AArch64::LDPDi: 6010b57cec5SDimitry Andric case AArch64::LDPSi: 6020b57cec5SDimitry Andric DestRegIdx = -1; 6030b57cec5SDimitry Andric BaseRegIdx = 2; 6040b57cec5SDimitry Andric OffsetIdx = 3; 6050b57cec5SDimitry Andric IsPrePost = false; 6060b57cec5SDimitry Andric break; 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric case AArch64::LDPSWi: 6090b57cec5SDimitry Andric case AArch64::LDPWi: 6100b57cec5SDimitry Andric case AArch64::LDPXi: 6110b57cec5SDimitry Andric DestRegIdx = 0; 6120b57cec5SDimitry Andric BaseRegIdx = 2; 6130b57cec5SDimitry Andric OffsetIdx = 3; 6140b57cec5SDimitry Andric IsPrePost = false; 6150b57cec5SDimitry Andric break; 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric case AArch64::LDPQpost: 6180b57cec5SDimitry Andric case AArch64::LDPQpre: 6190b57cec5SDimitry Andric case AArch64::LDPDpost: 6200b57cec5SDimitry Andric case AArch64::LDPDpre: 6210b57cec5SDimitry Andric case AArch64::LDPSpost: 6220b57cec5SDimitry Andric case AArch64::LDPSpre: 6230b57cec5SDimitry Andric DestRegIdx = -1; 6240b57cec5SDimitry Andric BaseRegIdx = 3; 6250b57cec5SDimitry Andric OffsetIdx = 4; 6260b57cec5SDimitry Andric IsPrePost = true; 6270b57cec5SDimitry Andric break; 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric case AArch64::LDPSWpost: 6300b57cec5SDimitry Andric case AArch64::LDPSWpre: 6310b57cec5SDimitry Andric case AArch64::LDPWpost: 6320b57cec5SDimitry Andric case AArch64::LDPWpre: 6330b57cec5SDimitry Andric case AArch64::LDPXpost: 6340b57cec5SDimitry Andric case AArch64::LDPXpre: 6350b57cec5SDimitry Andric DestRegIdx = 1; 6360b57cec5SDimitry Andric BaseRegIdx = 3; 6370b57cec5SDimitry Andric OffsetIdx = 4; 6380b57cec5SDimitry Andric IsPrePost = true; 6390b57cec5SDimitry Andric break; 6400b57cec5SDimitry Andric } 6410b57cec5SDimitry Andric 6420b57cec5SDimitry Andric // Loads from the stack pointer don't get prefetched. 6438bcb0991SDimitry Andric Register BaseReg = MI.getOperand(BaseRegIdx).getReg(); 6440b57cec5SDimitry Andric if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP) 645bdd1243dSDimitry Andric return std::nullopt; 6460b57cec5SDimitry Andric 6470b57cec5SDimitry Andric LoadInfo LI; 6480b57cec5SDimitry Andric LI.DestReg = DestRegIdx == -1 ? Register() : MI.getOperand(DestRegIdx).getReg(); 6490b57cec5SDimitry Andric LI.BaseReg = BaseReg; 6500b57cec5SDimitry Andric LI.BaseRegIdx = BaseRegIdx; 6510b57cec5SDimitry Andric LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx); 6520b57cec5SDimitry Andric LI.IsPrePost = IsPrePost; 6530b57cec5SDimitry Andric return LI; 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric 656bdd1243dSDimitry Andric static std::optional<unsigned> getTag(const TargetRegisterInfo *TRI, 657bdd1243dSDimitry Andric const MachineInstr &MI, 658bdd1243dSDimitry Andric const LoadInfo &LI) { 6590b57cec5SDimitry Andric unsigned Dest = LI.DestReg ? TRI->getEncodingValue(LI.DestReg) : 0; 6600b57cec5SDimitry Andric unsigned Base = TRI->getEncodingValue(LI.BaseReg); 6610b57cec5SDimitry Andric unsigned Off; 6620b57cec5SDimitry Andric if (LI.OffsetOpnd == nullptr) 6630b57cec5SDimitry Andric Off = 0; 6640b57cec5SDimitry Andric else if (LI.OffsetOpnd->isGlobal() || LI.OffsetOpnd->isSymbol() || 6650b57cec5SDimitry Andric LI.OffsetOpnd->isCPI()) 666bdd1243dSDimitry Andric return std::nullopt; 6670b57cec5SDimitry Andric else if (LI.OffsetOpnd->isReg()) 6680b57cec5SDimitry Andric Off = (1 << 5) | TRI->getEncodingValue(LI.OffsetOpnd->getReg()); 6690b57cec5SDimitry Andric else 6700b57cec5SDimitry Andric Off = LI.OffsetOpnd->getImm() >> 2; 6710b57cec5SDimitry Andric 6720b57cec5SDimitry Andric return makeTag(Dest, Base, Off); 6730b57cec5SDimitry Andric } 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) { 6760b57cec5SDimitry Andric // Build the initial tag map for the whole loop. 6770b57cec5SDimitry Andric TagMap.clear(); 6780b57cec5SDimitry Andric for (MachineBasicBlock *MBB : L.getBlocks()) 6790b57cec5SDimitry Andric for (MachineInstr &MI : *MBB) { 680bdd1243dSDimitry Andric std::optional<LoadInfo> LInfo = getLoadInfo(MI); 6810b57cec5SDimitry Andric if (!LInfo) 6820b57cec5SDimitry Andric continue; 683bdd1243dSDimitry Andric std::optional<unsigned> Tag = getTag(TRI, MI, *LInfo); 6840b57cec5SDimitry Andric if (!Tag) 6850b57cec5SDimitry Andric continue; 6860b57cec5SDimitry Andric TagMap[*Tag].push_back(&MI); 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric 6890b57cec5SDimitry Andric bool AnyCollisions = false; 6900b57cec5SDimitry Andric for (auto &P : TagMap) { 6910b57cec5SDimitry Andric auto Size = P.second.size(); 6920b57cec5SDimitry Andric if (Size > 1) { 6930b57cec5SDimitry Andric for (auto *MI : P.second) { 6940b57cec5SDimitry Andric if (TII->isStridedAccess(*MI)) { 6950b57cec5SDimitry Andric AnyCollisions = true; 6960b57cec5SDimitry Andric break; 6970b57cec5SDimitry Andric } 6980b57cec5SDimitry Andric } 6990b57cec5SDimitry Andric } 7000b57cec5SDimitry Andric if (AnyCollisions) 7010b57cec5SDimitry Andric break; 7020b57cec5SDimitry Andric } 7030b57cec5SDimitry Andric // Nothing to fix. 7040b57cec5SDimitry Andric if (!AnyCollisions) 7050b57cec5SDimitry Andric return; 7060b57cec5SDimitry Andric 7070b57cec5SDimitry Andric MachineRegisterInfo &MRI = Fn.getRegInfo(); 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric // Go through all the basic blocks in the current loop and fix any streaming 7100b57cec5SDimitry Andric // loads to avoid collisions with any other loads. 7110b57cec5SDimitry Andric LiveRegUnits LR(*TRI); 7120b57cec5SDimitry Andric for (MachineBasicBlock *MBB : L.getBlocks()) { 7130b57cec5SDimitry Andric LR.clear(); 7140b57cec5SDimitry Andric LR.addLiveOuts(*MBB); 7150b57cec5SDimitry Andric for (auto I = MBB->rbegin(); I != MBB->rend(); LR.stepBackward(*I), ++I) { 7160b57cec5SDimitry Andric MachineInstr &MI = *I; 7170b57cec5SDimitry Andric if (!TII->isStridedAccess(MI)) 7180b57cec5SDimitry Andric continue; 7190b57cec5SDimitry Andric 720bdd1243dSDimitry Andric std::optional<LoadInfo> OptLdI = getLoadInfo(MI); 7210b57cec5SDimitry Andric if (!OptLdI) 7220b57cec5SDimitry Andric continue; 7230b57cec5SDimitry Andric LoadInfo LdI = *OptLdI; 724bdd1243dSDimitry Andric std::optional<unsigned> OptOldTag = getTag(TRI, MI, LdI); 7250b57cec5SDimitry Andric if (!OptOldTag) 7260b57cec5SDimitry Andric continue; 7270b57cec5SDimitry Andric auto &OldCollisions = TagMap[*OptOldTag]; 7280b57cec5SDimitry Andric if (OldCollisions.size() <= 1) 7290b57cec5SDimitry Andric continue; 7300b57cec5SDimitry Andric 7310b57cec5SDimitry Andric bool Fixed = false; 7320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Attempting to fix tag collision: " << MI); 7330b57cec5SDimitry Andric 7340b57cec5SDimitry Andric if (!DebugCounter::shouldExecute(FixCounter)) { 7350b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Skipping fix due to debug counter:\n " << MI); 7360b57cec5SDimitry Andric continue; 7370b57cec5SDimitry Andric } 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric // Add the non-base registers of MI as live so we don't use them as 7400b57cec5SDimitry Andric // scratch registers. 7410b57cec5SDimitry Andric for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) { 7420b57cec5SDimitry Andric if (OpI == static_cast<unsigned>(LdI.BaseRegIdx)) 7430b57cec5SDimitry Andric continue; 7440b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(OpI); 7450b57cec5SDimitry Andric if (MO.isReg() && MO.readsReg()) 7460b57cec5SDimitry Andric LR.addReg(MO.getReg()); 7470b57cec5SDimitry Andric } 7480b57cec5SDimitry Andric 7490b57cec5SDimitry Andric for (unsigned ScratchReg : AArch64::GPR64RegClass) { 7500b57cec5SDimitry Andric if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg)) 7510b57cec5SDimitry Andric continue; 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric LoadInfo NewLdI(LdI); 7540b57cec5SDimitry Andric NewLdI.BaseReg = ScratchReg; 7550b57cec5SDimitry Andric unsigned NewTag = *getTag(TRI, MI, NewLdI); 7560b57cec5SDimitry Andric // Scratch reg tag would collide too, so don't use it. 7570b57cec5SDimitry Andric if (TagMap.count(NewTag)) 7580b57cec5SDimitry Andric continue; 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Changing base reg to: " 7610b57cec5SDimitry Andric << printReg(ScratchReg, TRI) << '\n'); 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric // Rewrite: 7640b57cec5SDimitry Andric // Xd = LOAD Xb, off 7650b57cec5SDimitry Andric // to: 7660b57cec5SDimitry Andric // Xc = MOV Xb 7670b57cec5SDimitry Andric // Xd = LOAD Xc, off 7680b57cec5SDimitry Andric DebugLoc DL = MI.getDebugLoc(); 7690b57cec5SDimitry Andric BuildMI(*MBB, &MI, DL, TII->get(AArch64::ORRXrs), ScratchReg) 7700b57cec5SDimitry Andric .addReg(AArch64::XZR) 7710b57cec5SDimitry Andric .addReg(LdI.BaseReg) 7720b57cec5SDimitry Andric .addImm(0); 7730b57cec5SDimitry Andric MachineOperand &BaseOpnd = MI.getOperand(LdI.BaseRegIdx); 7740b57cec5SDimitry Andric BaseOpnd.setReg(ScratchReg); 7750b57cec5SDimitry Andric 7760b57cec5SDimitry Andric // If the load does a pre/post increment, then insert a MOV after as 7770b57cec5SDimitry Andric // well to update the real base register. 7780b57cec5SDimitry Andric if (LdI.IsPrePost) { 7790b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Doing post MOV of incremented reg: " 7800b57cec5SDimitry Andric << printReg(ScratchReg, TRI) << '\n'); 7810b57cec5SDimitry Andric MI.getOperand(0).setReg( 7820b57cec5SDimitry Andric ScratchReg); // Change tied operand pre/post update dest. 7830b57cec5SDimitry Andric BuildMI(*MBB, std::next(MachineBasicBlock::iterator(MI)), DL, 7840b57cec5SDimitry Andric TII->get(AArch64::ORRXrs), LdI.BaseReg) 7850b57cec5SDimitry Andric .addReg(AArch64::XZR) 7860b57cec5SDimitry Andric .addReg(ScratchReg) 7870b57cec5SDimitry Andric .addImm(0); 7880b57cec5SDimitry Andric } 7890b57cec5SDimitry Andric 7900b57cec5SDimitry Andric for (int I = 0, E = OldCollisions.size(); I != E; ++I) 7910b57cec5SDimitry Andric if (OldCollisions[I] == &MI) { 7920b57cec5SDimitry Andric std::swap(OldCollisions[I], OldCollisions[E - 1]); 7930b57cec5SDimitry Andric OldCollisions.pop_back(); 7940b57cec5SDimitry Andric break; 7950b57cec5SDimitry Andric } 7960b57cec5SDimitry Andric 7970b57cec5SDimitry Andric // Update TagMap to reflect instruction changes to reduce the number 7980b57cec5SDimitry Andric // of later MOVs to be inserted. This needs to be done after 7990b57cec5SDimitry Andric // OldCollisions is updated since it may be relocated by this 8000b57cec5SDimitry Andric // insertion. 8010b57cec5SDimitry Andric TagMap[NewTag].push_back(&MI); 8020b57cec5SDimitry Andric ++NumCollisionsAvoided; 8030b57cec5SDimitry Andric Fixed = true; 8040b57cec5SDimitry Andric Modified = true; 8050b57cec5SDimitry Andric break; 8060b57cec5SDimitry Andric } 8070b57cec5SDimitry Andric if (!Fixed) 8080b57cec5SDimitry Andric ++NumCollisionsNotAvoided; 8090b57cec5SDimitry Andric } 8100b57cec5SDimitry Andric } 8110b57cec5SDimitry Andric } 8120b57cec5SDimitry Andric 8130b57cec5SDimitry Andric bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) { 81481ad6265SDimitry Andric auto &ST = Fn.getSubtarget<AArch64Subtarget>(); 8150b57cec5SDimitry Andric if (ST.getProcFamily() != AArch64Subtarget::Falkor) 8160b57cec5SDimitry Andric return false; 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric if (skipFunction(Fn.getFunction())) 8190b57cec5SDimitry Andric return false; 8200b57cec5SDimitry Andric 8210b57cec5SDimitry Andric TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo()); 8220b57cec5SDimitry Andric TRI = ST.getRegisterInfo(); 8230b57cec5SDimitry Andric 824*0fca6ea1SDimitry Andric MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric Modified = false; 8270b57cec5SDimitry Andric 8280b57cec5SDimitry Andric for (MachineLoop *I : LI) 8290eae32dcSDimitry Andric for (MachineLoop *L : depth_first(I)) 8300b57cec5SDimitry Andric // Only process inner-loops 831e8d8bef9SDimitry Andric if (L->isInnermost()) 8320eae32dcSDimitry Andric runOnLoop(*L, Fn); 8330b57cec5SDimitry Andric 8340b57cec5SDimitry Andric return Modified; 8350b57cec5SDimitry Andric } 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric FunctionPass *llvm::createFalkorHWPFFixPass() { return new FalkorHWPFFix(); } 838