1dc84770dSAmara Emerson //===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==// 2dc84770dSAmara Emerson // 3dc84770dSAmara Emerson // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4dc84770dSAmara Emerson // See https://llvm.org/LICENSE.txt for license information. 5dc84770dSAmara Emerson // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6dc84770dSAmara Emerson // 7dc84770dSAmara Emerson //===----------------------------------------------------------------------===// 8dc84770dSAmara Emerson /// \file 9dc84770dSAmara Emerson /// This file implements the LoadStoreOpt optimization pass. 10dc84770dSAmara Emerson //===----------------------------------------------------------------------===// 11dc84770dSAmara Emerson 12dc84770dSAmara Emerson #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" 1329c851f4SAmara Emerson #include "llvm/ADT/STLExtras.h" 1429c851f4SAmara Emerson #include "llvm/ADT/SmallPtrSet.h" 15dc84770dSAmara Emerson #include "llvm/ADT/Statistic.h" 16dc84770dSAmara Emerson #include "llvm/Analysis/AliasAnalysis.h" 17dc84770dSAmara Emerson #include "llvm/Analysis/MemoryLocation.h" 18dc84770dSAmara Emerson #include "llvm/Analysis/OptimizationRemarkEmitter.h" 19dc84770dSAmara Emerson #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20dc84770dSAmara Emerson #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 21dc84770dSAmara Emerson #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 22dc84770dSAmara Emerson #include "llvm/CodeGen/GlobalISel/Utils.h" 23d45fae60SNAKAMURA Takumi #include "llvm/CodeGen/LowLevelTypeUtils.h" 24dc84770dSAmara Emerson #include "llvm/CodeGen/MachineBasicBlock.h" 25dc84770dSAmara Emerson #include "llvm/CodeGen/MachineFrameInfo.h" 26dc84770dSAmara Emerson #include "llvm/CodeGen/MachineFunction.h" 27dc84770dSAmara Emerson #include "llvm/CodeGen/MachineInstr.h" 28dc84770dSAmara Emerson #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" 29dc84770dSAmara Emerson #include "llvm/CodeGen/MachineRegisterInfo.h" 30dc84770dSAmara Emerson #include "llvm/CodeGen/Register.h" 31dc84770dSAmara Emerson #include "llvm/CodeGen/TargetLowering.h" 32dc84770dSAmara Emerson #include "llvm/CodeGen/TargetOpcodes.h" 33dc84770dSAmara Emerson #include "llvm/IR/DebugInfoMetadata.h" 34dc84770dSAmara Emerson #include "llvm/InitializePasses.h" 35dc84770dSAmara Emerson #include "llvm/Support/AtomicOrdering.h" 36dc84770dSAmara Emerson #include "llvm/Support/Casting.h" 37dc84770dSAmara Emerson #include "llvm/Support/Debug.h" 38dc84770dSAmara Emerson #include "llvm/Support/ErrorHandling.h" 39dc84770dSAmara Emerson #include <algorithm> 40dc84770dSAmara Emerson 41dc84770dSAmara Emerson #define DEBUG_TYPE "loadstore-opt" 42dc84770dSAmara Emerson 43dc84770dSAmara Emerson using namespace llvm; 44dc84770dSAmara Emerson using namespace ore; 45dc84770dSAmara Emerson using namespace MIPatternMatch; 46dc84770dSAmara Emerson 47dc84770dSAmara Emerson STATISTIC(NumStoresMerged, "Number of stores merged"); 48dc84770dSAmara Emerson 49dc84770dSAmara Emerson const unsigned MaxStoreSizeToForm = 128; 50dc84770dSAmara Emerson 51dc84770dSAmara Emerson char LoadStoreOpt::ID = 0; 52dc84770dSAmara Emerson INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", 53dc84770dSAmara Emerson false, false) 54dc84770dSAmara Emerson INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", 55dc84770dSAmara Emerson false, false) 56dc84770dSAmara Emerson 57dc84770dSAmara Emerson LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F) 58dc84770dSAmara Emerson : MachineFunctionPass(ID), DoNotRunPass(F) {} 59dc84770dSAmara Emerson 60dc84770dSAmara Emerson LoadStoreOpt::LoadStoreOpt() 61dc84770dSAmara Emerson : LoadStoreOpt([](const MachineFunction &) { return false; }) {} 62dc84770dSAmara Emerson 63dc84770dSAmara Emerson void LoadStoreOpt::init(MachineFunction &MF) { 64dc84770dSAmara Emerson this->MF = &MF; 65dc84770dSAmara Emerson MRI = &MF.getRegInfo(); 66dc84770dSAmara Emerson AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 67dc84770dSAmara Emerson TLI = MF.getSubtarget().getTargetLowering(); 68dc84770dSAmara Emerson LI = MF.getSubtarget().getLegalizerInfo(); 69dc84770dSAmara Emerson Builder.setMF(MF); 70dc84770dSAmara Emerson IsPreLegalizer = !MF.getProperties().hasProperty( 71dc84770dSAmara Emerson MachineFunctionProperties::Property::Legalized); 72dc84770dSAmara Emerson InstsToErase.clear(); 73dc84770dSAmara Emerson } 74dc84770dSAmara Emerson 75dc84770dSAmara Emerson void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const { 76dc84770dSAmara Emerson AU.addRequired<AAResultsWrapperPass>(); 77e7bc7373SMatt Arsenault AU.setPreservesAll(); 78dc84770dSAmara Emerson getSelectionDAGFallbackAnalysisUsage(AU); 79dc84770dSAmara Emerson MachineFunctionPass::getAnalysisUsage(AU); 80dc84770dSAmara Emerson } 81dc84770dSAmara Emerson 82dc84770dSAmara Emerson BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr, 83dc84770dSAmara Emerson MachineRegisterInfo &MRI) { 84dc84770dSAmara Emerson BaseIndexOffset Info; 85dc84770dSAmara Emerson Register PtrAddRHS; 8619f4d682SAmara Emerson Register BaseReg; 8719f4d682SAmara Emerson if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(BaseReg), m_Reg(PtrAddRHS)))) { 8819f4d682SAmara Emerson Info.setBase(Ptr); 8919f4d682SAmara Emerson Info.setOffset(0); 90dc84770dSAmara Emerson return Info; 91dc84770dSAmara Emerson } 9219f4d682SAmara Emerson Info.setBase(BaseReg); 93dc84770dSAmara Emerson auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI); 94dc84770dSAmara Emerson if (RHSCst) 9519f4d682SAmara Emerson Info.setOffset(RHSCst->Value.getSExtValue()); 96dc84770dSAmara Emerson 97dc84770dSAmara Emerson // Just recognize a simple case for now. In future we'll need to match 98dc84770dSAmara Emerson // indexing patterns for base + index + constant. 9919f4d682SAmara Emerson Info.setIndex(PtrAddRHS); 100dc84770dSAmara Emerson return Info; 101dc84770dSAmara Emerson } 102dc84770dSAmara Emerson 103dc84770dSAmara Emerson bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1, 104dc84770dSAmara Emerson const MachineInstr &MI2, 105dc84770dSAmara Emerson bool &IsAlias, 106dc84770dSAmara Emerson MachineRegisterInfo &MRI) { 107dc84770dSAmara Emerson auto *LdSt1 = dyn_cast<GLoadStore>(&MI1); 108dc84770dSAmara Emerson auto *LdSt2 = dyn_cast<GLoadStore>(&MI2); 109dc84770dSAmara Emerson if (!LdSt1 || !LdSt2) 110dc84770dSAmara Emerson return false; 111dc84770dSAmara Emerson 112dc84770dSAmara Emerson BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI); 113dc84770dSAmara Emerson BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI); 114dc84770dSAmara Emerson 11519f4d682SAmara Emerson if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid()) 116dc84770dSAmara Emerson return false; 117dc84770dSAmara Emerson 118601e102bSDavid Green LocationSize Size1 = LdSt1->getMemSize(); 119601e102bSDavid Green LocationSize Size2 = LdSt2->getMemSize(); 120dc84770dSAmara Emerson 121dc84770dSAmara Emerson int64_t PtrDiff; 12219f4d682SAmara Emerson if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() && 12319f4d682SAmara Emerson BasePtr1.hasValidOffset()) { 12419f4d682SAmara Emerson PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset(); 125dc84770dSAmara Emerson // If the size of memory access is unknown, do not use it to do analysis. 126dc84770dSAmara Emerson // One example of unknown size memory access is to load/store scalable 127dc84770dSAmara Emerson // vector objects on the stack. 128dc84770dSAmara Emerson // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the 129dc84770dSAmara Emerson // following situations arise: 13057146daeSHarvin Iriawan if (PtrDiff >= 0 && Size1.hasValue() && !Size1.isScalable()) { 131dc84770dSAmara Emerson // [----BasePtr0----] 132dc84770dSAmara Emerson // [---BasePtr1--] 133dc84770dSAmara Emerson // ========PtrDiff========> 1348ee7ef6aSDavid Green IsAlias = !((int64_t)Size1.getValue() <= PtrDiff); 135dc84770dSAmara Emerson return true; 136dc84770dSAmara Emerson } 13757146daeSHarvin Iriawan if (PtrDiff < 0 && Size2.hasValue() && !Size2.isScalable()) { 138dc84770dSAmara Emerson // [----BasePtr0----] 139dc84770dSAmara Emerson // [---BasePtr1--] 140dc84770dSAmara Emerson // =====(-PtrDiff)====> 1418ee7ef6aSDavid Green IsAlias = !((PtrDiff + (int64_t)Size2.getValue()) <= 0); 142dc84770dSAmara Emerson return true; 143dc84770dSAmara Emerson } 144dc84770dSAmara Emerson return false; 145dc84770dSAmara Emerson } 146dc84770dSAmara Emerson 147dc84770dSAmara Emerson // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be 148dc84770dSAmara Emerson // able to calculate their relative offset if at least one arises 149dc84770dSAmara Emerson // from an alloca. However, these allocas cannot overlap and we 150dc84770dSAmara Emerson // can infer there is no alias. 15119f4d682SAmara Emerson auto *Base0Def = getDefIgnoringCopies(BasePtr0.getBase(), MRI); 15219f4d682SAmara Emerson auto *Base1Def = getDefIgnoringCopies(BasePtr1.getBase(), MRI); 153dc84770dSAmara Emerson if (!Base0Def || !Base1Def) 154dc84770dSAmara Emerson return false; // Couldn't tell anything. 155dc84770dSAmara Emerson 156dc84770dSAmara Emerson 157dc84770dSAmara Emerson if (Base0Def->getOpcode() != Base1Def->getOpcode()) 158dc84770dSAmara Emerson return false; 159dc84770dSAmara Emerson 160dc84770dSAmara Emerson if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 161dc84770dSAmara Emerson MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo(); 162dc84770dSAmara Emerson // If the bases have the same frame index but we couldn't find a 163dc84770dSAmara Emerson // constant offset, (indices are different) be conservative. 164dc84770dSAmara Emerson if (Base0Def != Base1Def && 165dc84770dSAmara Emerson (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) || 166dc84770dSAmara Emerson !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) { 167dc84770dSAmara Emerson IsAlias = false; 168dc84770dSAmara Emerson return true; 169dc84770dSAmara Emerson } 170dc84770dSAmara Emerson } 171dc84770dSAmara Emerson 172dc84770dSAmara Emerson // This implementation is a lot more primitive than the SDAG one for now. 173dc84770dSAmara Emerson // FIXME: what about constant pools? 174dc84770dSAmara Emerson if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) { 175dc84770dSAmara Emerson auto GV0 = Base0Def->getOperand(1).getGlobal(); 176dc84770dSAmara Emerson auto GV1 = Base1Def->getOperand(1).getGlobal(); 177dc84770dSAmara Emerson if (GV0 != GV1) { 178dc84770dSAmara Emerson IsAlias = false; 179dc84770dSAmara Emerson return true; 180dc84770dSAmara Emerson } 181dc84770dSAmara Emerson } 182dc84770dSAmara Emerson 183dc84770dSAmara Emerson // Can't tell anything about aliasing. 184dc84770dSAmara Emerson return false; 185dc84770dSAmara Emerson } 186dc84770dSAmara Emerson 187dc84770dSAmara Emerson bool GISelAddressing::instMayAlias(const MachineInstr &MI, 188dc84770dSAmara Emerson const MachineInstr &Other, 189dc84770dSAmara Emerson MachineRegisterInfo &MRI, 190dc84770dSAmara Emerson AliasAnalysis *AA) { 191dc84770dSAmara Emerson struct MemUseCharacteristics { 192dc84770dSAmara Emerson bool IsVolatile; 193dc84770dSAmara Emerson bool IsAtomic; 194dc84770dSAmara Emerson Register BasePtr; 195dc84770dSAmara Emerson int64_t Offset; 1968ee7ef6aSDavid Green LocationSize NumBytes; 197dc84770dSAmara Emerson MachineMemOperand *MMO; 198dc84770dSAmara Emerson }; 199dc84770dSAmara Emerson 200dc84770dSAmara Emerson auto getCharacteristics = 201dc84770dSAmara Emerson [&](const MachineInstr *MI) -> MemUseCharacteristics { 202dc84770dSAmara Emerson if (const auto *LS = dyn_cast<GLoadStore>(MI)) { 203dc84770dSAmara Emerson Register BaseReg; 204dc84770dSAmara Emerson int64_t Offset = 0; 205dc84770dSAmara Emerson // No pre/post-inc addressing modes are considered here, unlike in SDAG. 206dc84770dSAmara Emerson if (!mi_match(LS->getPointerReg(), MRI, 207dc84770dSAmara Emerson m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) { 208dc84770dSAmara Emerson BaseReg = LS->getPointerReg(); 209dc84770dSAmara Emerson Offset = 0; 210dc84770dSAmara Emerson } 211dc84770dSAmara Emerson 212601e102bSDavid Green LocationSize Size = LS->getMMO().getSize(); 213601e102bSDavid Green return {LS->isVolatile(), LS->isAtomic(), BaseReg, 214601e102bSDavid Green Offset /*base offset*/, Size, &LS->getMMO()}; 215dc84770dSAmara Emerson } 216dc84770dSAmara Emerson // FIXME: support recognizing lifetime instructions. 217dc84770dSAmara Emerson // Default. 218dc84770dSAmara Emerson return {false /*isvolatile*/, 2198ee7ef6aSDavid Green /*isAtomic*/ false, 2208ee7ef6aSDavid Green Register(), 2218ee7ef6aSDavid Green (int64_t)0 /*offset*/, 2228ee7ef6aSDavid Green LocationSize::beforeOrAfterPointer() /*size*/, 223dc84770dSAmara Emerson (MachineMemOperand *)nullptr}; 224dc84770dSAmara Emerson }; 225dc84770dSAmara Emerson MemUseCharacteristics MUC0 = getCharacteristics(&MI), 226dc84770dSAmara Emerson MUC1 = getCharacteristics(&Other); 227dc84770dSAmara Emerson 228dc84770dSAmara Emerson // If they are to the same address, then they must be aliases. 229dc84770dSAmara Emerson if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr && 230dc84770dSAmara Emerson MUC0.Offset == MUC1.Offset) 231dc84770dSAmara Emerson return true; 232dc84770dSAmara Emerson 233dc84770dSAmara Emerson // If they are both volatile then they cannot be reordered. 234dc84770dSAmara Emerson if (MUC0.IsVolatile && MUC1.IsVolatile) 235dc84770dSAmara Emerson return true; 236dc84770dSAmara Emerson 237dc84770dSAmara Emerson // Be conservative about atomics for the moment 238dc84770dSAmara Emerson // TODO: This is way overconservative for unordered atomics (see D66309) 239dc84770dSAmara Emerson if (MUC0.IsAtomic && MUC1.IsAtomic) 240dc84770dSAmara Emerson return true; 241dc84770dSAmara Emerson 242dc84770dSAmara Emerson // If one operation reads from invariant memory, and the other may store, they 243dc84770dSAmara Emerson // cannot alias. 244dc84770dSAmara Emerson if (MUC0.MMO && MUC1.MMO) { 245dc84770dSAmara Emerson if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || 246dc84770dSAmara Emerson (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) 247dc84770dSAmara Emerson return false; 248dc84770dSAmara Emerson } 249dc84770dSAmara Emerson 25057146daeSHarvin Iriawan // If NumBytes is scalable and offset is not 0, conservatively return may 25157146daeSHarvin Iriawan // alias 25257146daeSHarvin Iriawan if ((MUC0.NumBytes.isScalable() && MUC0.Offset != 0) || 25357146daeSHarvin Iriawan (MUC1.NumBytes.isScalable() && MUC1.Offset != 0)) 25457146daeSHarvin Iriawan return true; 25557146daeSHarvin Iriawan 25657146daeSHarvin Iriawan const bool BothNotScalable = 25757146daeSHarvin Iriawan !MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable(); 25857146daeSHarvin Iriawan 259dc84770dSAmara Emerson // Try to prove that there is aliasing, or that there is no aliasing. Either 260dc84770dSAmara Emerson // way, we can return now. If nothing can be proved, proceed with more tests. 261dc84770dSAmara Emerson bool IsAlias; 26257146daeSHarvin Iriawan if (BothNotScalable && 26357146daeSHarvin Iriawan GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI)) 264dc84770dSAmara Emerson return IsAlias; 265dc84770dSAmara Emerson 266dc84770dSAmara Emerson // The following all rely on MMO0 and MMO1 being valid. 267dc84770dSAmara Emerson if (!MUC0.MMO || !MUC1.MMO) 268dc84770dSAmara Emerson return true; 269dc84770dSAmara Emerson 270dc84770dSAmara Emerson // FIXME: port the alignment based alias analysis from SDAG's isAlias(). 271dc84770dSAmara Emerson int64_t SrcValOffset0 = MUC0.MMO->getOffset(); 272dc84770dSAmara Emerson int64_t SrcValOffset1 = MUC1.MMO->getOffset(); 2738ee7ef6aSDavid Green LocationSize Size0 = MUC0.NumBytes; 2748ee7ef6aSDavid Green LocationSize Size1 = MUC1.NumBytes; 2758ee7ef6aSDavid Green if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0.hasValue() && 2768ee7ef6aSDavid Green Size1.hasValue()) { 277dc84770dSAmara Emerson // Use alias analysis information. 278dc84770dSAmara Emerson int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); 27957146daeSHarvin Iriawan int64_t Overlap0 = 28057146daeSHarvin Iriawan Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset; 28157146daeSHarvin Iriawan int64_t Overlap1 = 28257146daeSHarvin Iriawan Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset; 28357146daeSHarvin Iriawan LocationSize Loc0 = 28457146daeSHarvin Iriawan Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0); 28557146daeSHarvin Iriawan LocationSize Loc1 = 28657146daeSHarvin Iriawan Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1); 28757146daeSHarvin Iriawan 28857146daeSHarvin Iriawan if (AA->isNoAlias( 28957146daeSHarvin Iriawan MemoryLocation(MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()), 29057146daeSHarvin Iriawan MemoryLocation(MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo()))) 291dc84770dSAmara Emerson return false; 292dc84770dSAmara Emerson } 293dc84770dSAmara Emerson 294dc84770dSAmara Emerson // Otherwise we have to assume they alias. 295dc84770dSAmara Emerson return true; 296dc84770dSAmara Emerson } 297dc84770dSAmara Emerson 298dc84770dSAmara Emerson /// Returns true if the instruction creates an unavoidable hazard that 299dc84770dSAmara Emerson /// forces a boundary between store merge candidates. 300dc84770dSAmara Emerson static bool isInstHardMergeHazard(MachineInstr &MI) { 301dc84770dSAmara Emerson return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef(); 302dc84770dSAmara Emerson } 303dc84770dSAmara Emerson 304dc84770dSAmara Emerson bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { 305dc84770dSAmara Emerson // Try to merge all the stores in the vector, splitting into separate segments 306dc84770dSAmara Emerson // as necessary. 307dc84770dSAmara Emerson assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge"); 308dc84770dSAmara Emerson LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg()); 309dc84770dSAmara Emerson LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg()); 310dc84770dSAmara Emerson unsigned AS = PtrTy.getAddressSpace(); 311dc84770dSAmara Emerson // Ensure the legal store info is computed for this address space. 312dc84770dSAmara Emerson initializeStoreMergeTargetInfo(AS); 313dc84770dSAmara Emerson const auto &LegalSizes = LegalStoreSizes[AS]; 314dc84770dSAmara Emerson 315dc84770dSAmara Emerson #ifndef NDEBUG 3169e6d1f4bSKazu Hirata for (auto *StoreMI : StoresToMerge) 317dc84770dSAmara Emerson assert(MRI->getType(StoreMI->getValueReg()) == OrigTy); 318dc84770dSAmara Emerson #endif 319dc84770dSAmara Emerson 320dc84770dSAmara Emerson bool AnyMerged = false; 321dc84770dSAmara Emerson do { 322f20b5071SKazu Hirata unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size()); 3238fd5558bSGuillaume Chatelet unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue(); 324dc84770dSAmara Emerson // Compute the biggest store we can generate to handle the number of stores. 325dc84770dSAmara Emerson unsigned MergeSizeBits; 326dc84770dSAmara Emerson for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) { 327dc84770dSAmara Emerson LLT StoreTy = LLT::scalar(MergeSizeBits); 328dc84770dSAmara Emerson EVT StoreEVT = 329*0d9fc174SCraig Topper getApproximateEVTForLLT(StoreTy, MF->getFunction().getContext()); 330dc84770dSAmara Emerson if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] && 331dc84770dSAmara Emerson TLI->canMergeStoresTo(AS, StoreEVT, *MF) && 332dc84770dSAmara Emerson (TLI->isTypeLegal(StoreEVT))) 333dc84770dSAmara Emerson break; // We can generate a MergeSize bits store. 334dc84770dSAmara Emerson } 335dc84770dSAmara Emerson if (MergeSizeBits <= OrigTy.getSizeInBits()) 336dc84770dSAmara Emerson return AnyMerged; // No greater merge. 337dc84770dSAmara Emerson 338dc84770dSAmara Emerson unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits(); 339dc84770dSAmara Emerson // Perform the actual merging. 340dc84770dSAmara Emerson SmallVector<GStore *, 8> SingleMergeStores( 341dc84770dSAmara Emerson StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge); 342dc84770dSAmara Emerson AnyMerged |= doSingleStoreMerge(SingleMergeStores); 343dc84770dSAmara Emerson StoresToMerge.erase(StoresToMerge.begin(), 344dc84770dSAmara Emerson StoresToMerge.begin() + NumStoresToMerge); 345dc84770dSAmara Emerson } while (StoresToMerge.size() > 1); 346dc84770dSAmara Emerson return AnyMerged; 347dc84770dSAmara Emerson } 348dc84770dSAmara Emerson 349dc84770dSAmara Emerson bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query, 350dc84770dSAmara Emerson MachineFunction &MF) const { 351dc84770dSAmara Emerson auto Action = LI->getAction(Query).Action; 352dc84770dSAmara Emerson // If the instruction is unsupported, it can't be legalized at all. 353dc84770dSAmara Emerson if (Action == LegalizeActions::Unsupported) 354dc84770dSAmara Emerson return false; 355dc84770dSAmara Emerson return IsPreLegalizer || Action == LegalizeAction::Legal; 356dc84770dSAmara Emerson } 357dc84770dSAmara Emerson 358dc84770dSAmara Emerson bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) { 359dc84770dSAmara Emerson assert(Stores.size() > 1); 360dc84770dSAmara Emerson // We know that all the stores are consecutive and there are no aliasing 361dc84770dSAmara Emerson // operations in the range. However, the values that are being stored may be 362dc84770dSAmara Emerson // generated anywhere before each store. To ensure we have the values 363dc84770dSAmara Emerson // available, we materialize the wide value and new store at the place of the 364dc84770dSAmara Emerson // final store in the merge sequence. 365dc84770dSAmara Emerson GStore *FirstStore = Stores[0]; 366dc84770dSAmara Emerson const unsigned NumStores = Stores.size(); 367dc84770dSAmara Emerson LLT SmallTy = MRI->getType(FirstStore->getValueReg()); 368dc84770dSAmara Emerson LLT WideValueTy = 3698fd5558bSGuillaume Chatelet LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedValue()); 370dc84770dSAmara Emerson 371dc84770dSAmara Emerson // For each store, compute pairwise merged debug locs. 37292ec6149SAnton Sidorenko DebugLoc MergedLoc = Stores.front()->getDebugLoc(); 37392ec6149SAnton Sidorenko for (auto *Store : drop_begin(Stores)) 37492ec6149SAnton Sidorenko MergedLoc = DILocation::getMergedLocation(MergedLoc, Store->getDebugLoc()); 37592ec6149SAnton Sidorenko 376dc84770dSAmara Emerson Builder.setInstr(*Stores.back()); 377dc84770dSAmara Emerson Builder.setDebugLoc(MergedLoc); 378dc84770dSAmara Emerson 379dc84770dSAmara Emerson // If all of the store values are constants, then create a wide constant 380dc84770dSAmara Emerson // directly. Otherwise, we need to generate some instructions to merge the 381dc84770dSAmara Emerson // existing values together into a wider type. 382dc84770dSAmara Emerson SmallVector<APInt, 8> ConstantVals; 3839e6d1f4bSKazu Hirata for (auto *Store : Stores) { 384dc84770dSAmara Emerson auto MaybeCst = 385dc84770dSAmara Emerson getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI); 386dc84770dSAmara Emerson if (!MaybeCst) { 387dc84770dSAmara Emerson ConstantVals.clear(); 388dc84770dSAmara Emerson break; 389dc84770dSAmara Emerson } 390dc84770dSAmara Emerson ConstantVals.emplace_back(MaybeCst->Value); 391dc84770dSAmara Emerson } 392dc84770dSAmara Emerson 3939a6817b7SFrederik Gossen Register WideReg; 3949a6817b7SFrederik Gossen auto *WideMMO = 3959a6817b7SFrederik Gossen MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy); 396dc84770dSAmara Emerson if (ConstantVals.empty()) { 397dc84770dSAmara Emerson // Mimic the SDAG behaviour here and don't try to do anything for unknown 398dc84770dSAmara Emerson // values. In future, we should also support the cases of loads and 399dc84770dSAmara Emerson // extracted vector elements. 400dc84770dSAmara Emerson return false; 401dc84770dSAmara Emerson } 402dc84770dSAmara Emerson 403dc84770dSAmara Emerson assert(ConstantVals.size() == NumStores); 404dc84770dSAmara Emerson // Check if our wide constant is legal. 405dc84770dSAmara Emerson if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF)) 406dc84770dSAmara Emerson return false; 407dc84770dSAmara Emerson APInt WideConst(WideValueTy.getSizeInBits(), 0); 408dc84770dSAmara Emerson for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) { 409dc84770dSAmara Emerson // Insert the smaller constant into the corresponding position in the 410dc84770dSAmara Emerson // wider one. 411dc84770dSAmara Emerson WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits()); 412dc84770dSAmara Emerson } 4139a6817b7SFrederik Gossen WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0); 414ecfe7a34SFrederik Gossen auto NewStore = 415ecfe7a34SFrederik Gossen Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO); 4163f3d4e8aSFrederik Gossen (void) NewStore; 41766d64aacSAmara Emerson LLVM_DEBUG(dbgs() << "Merged " << Stores.size() 41866d64aacSAmara Emerson << " stores into merged store: " << *NewStore); 41966d64aacSAmara Emerson LLVM_DEBUG(for (auto *MI : Stores) dbgs() << " " << *MI;); 420dc84770dSAmara Emerson NumStoresMerged += Stores.size(); 421dc84770dSAmara Emerson 422dc84770dSAmara Emerson MachineOptimizationRemarkEmitter MORE(*MF, nullptr); 423dc84770dSAmara Emerson MORE.emit([&]() { 424dc84770dSAmara Emerson MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore", 425dc84770dSAmara Emerson FirstStore->getDebugLoc(), 426dc84770dSAmara Emerson FirstStore->getParent()); 427dc84770dSAmara Emerson R << "Merged " << NV("NumMerged", Stores.size()) << " stores of " 428dc84770dSAmara Emerson << NV("OrigWidth", SmallTy.getSizeInBytes()) 429dc84770dSAmara Emerson << " bytes into a single store of " 430dc84770dSAmara Emerson << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes"; 431dc84770dSAmara Emerson return R; 432dc84770dSAmara Emerson }); 433dc84770dSAmara Emerson 4349e6d1f4bSKazu Hirata for (auto *MI : Stores) 435dc84770dSAmara Emerson InstsToErase.insert(MI); 436dc84770dSAmara Emerson return true; 437dc84770dSAmara Emerson } 438dc84770dSAmara Emerson 439dc84770dSAmara Emerson bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) { 440dc84770dSAmara Emerson if (C.Stores.size() < 2) { 441dc84770dSAmara Emerson C.reset(); 442dc84770dSAmara Emerson return false; 443dc84770dSAmara Emerson } 444dc84770dSAmara Emerson 445dc84770dSAmara Emerson LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size() 446dc84770dSAmara Emerson << " stores, starting with " << *C.Stores[0]); 447dc84770dSAmara Emerson // We know that the stores in the candidate are adjacent. 448dc84770dSAmara Emerson // Now we need to check if any potential aliasing instructions recorded 449dc84770dSAmara Emerson // during the search alias with load/stores added to the candidate after. 450dc84770dSAmara Emerson // For example, if we have the candidate: 451dc84770dSAmara Emerson // C.Stores = [ST1, ST2, ST3, ST4] 452dc84770dSAmara Emerson // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or 453dc84770dSAmara Emerson // ST2, then we would have recorded it into the PotentialAliases structure 454dc84770dSAmara Emerson // with the associated index value of "1". Then we see ST3 and ST4 and add 455dc84770dSAmara Emerson // them to the candidate group. We know that LD1 does not alias with ST1 or 456dc84770dSAmara Emerson // ST2, since we already did that check. However we don't yet know if it 457dc84770dSAmara Emerson // may alias ST3 and ST4, so we perform those checks now. 458dc84770dSAmara Emerson SmallVector<GStore *> StoresToMerge; 459dc84770dSAmara Emerson 460dc84770dSAmara Emerson auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) { 461dc84770dSAmara Emerson for (auto AliasInfo : reverse(C.PotentialAliases)) { 462dc84770dSAmara Emerson MachineInstr *PotentialAliasOp = AliasInfo.first; 463dc84770dSAmara Emerson unsigned PreCheckedIdx = AliasInfo.second; 46466d64aacSAmara Emerson if (static_cast<unsigned>(Idx) < PreCheckedIdx) { 46566d64aacSAmara Emerson // Once our store index is lower than the index associated with the 46666d64aacSAmara Emerson // potential alias, we know that we've already checked for this alias 46766d64aacSAmara Emerson // and all of the earlier potential aliases too. 46866d64aacSAmara Emerson return false; 46966d64aacSAmara Emerson } 470dc84770dSAmara Emerson // Need to check this alias. 471dc84770dSAmara Emerson if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI, 472dc84770dSAmara Emerson AA)) { 473dc84770dSAmara Emerson LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp 474dc84770dSAmara Emerson << " detected\n"); 475dc84770dSAmara Emerson return true; 476dc84770dSAmara Emerson } 477dc84770dSAmara Emerson } 478dc84770dSAmara Emerson return false; 479dc84770dSAmara Emerson }; 480dc84770dSAmara Emerson // Start from the last store in the group, and check if it aliases with any 481dc84770dSAmara Emerson // of the potential aliasing operations in the list. 482dc84770dSAmara Emerson for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) { 483dc84770dSAmara Emerson auto *CheckStore = C.Stores[StoreIdx]; 484dc84770dSAmara Emerson if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore)) 485dc84770dSAmara Emerson continue; 486dc84770dSAmara Emerson StoresToMerge.emplace_back(CheckStore); 487dc84770dSAmara Emerson } 488dc84770dSAmara Emerson 489dc84770dSAmara Emerson LLVM_DEBUG(dbgs() << StoresToMerge.size() 490dc84770dSAmara Emerson << " stores remaining after alias checks. Merging...\n"); 491dc84770dSAmara Emerson 492dc84770dSAmara Emerson // Now we've checked for aliasing hazards, merge any stores left. 493dc84770dSAmara Emerson C.reset(); 494dc84770dSAmara Emerson if (StoresToMerge.size() < 2) 495dc84770dSAmara Emerson return false; 496dc84770dSAmara Emerson return mergeStores(StoresToMerge); 497dc84770dSAmara Emerson } 498dc84770dSAmara Emerson 499dc84770dSAmara Emerson bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI, 500dc84770dSAmara Emerson StoreMergeCandidate &C) { 501dc84770dSAmara Emerson if (C.Stores.empty()) 502dc84770dSAmara Emerson return false; 503dc84770dSAmara Emerson return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) { 504dc84770dSAmara Emerson return instMayAlias(MI, *OtherMI, *MRI, AA); 505dc84770dSAmara Emerson }); 506dc84770dSAmara Emerson } 507dc84770dSAmara Emerson 508dc84770dSAmara Emerson void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) { 509dc84770dSAmara Emerson PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1)); 510dc84770dSAmara Emerson } 511dc84770dSAmara Emerson 512dc84770dSAmara Emerson bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, 513dc84770dSAmara Emerson StoreMergeCandidate &C) { 514dc84770dSAmara Emerson // Check if the given store writes to an adjacent address, and other 515dc84770dSAmara Emerson // requirements. 516dc84770dSAmara Emerson LLT ValueTy = MRI->getType(StoreMI.getValueReg()); 517dc84770dSAmara Emerson LLT PtrTy = MRI->getType(StoreMI.getPointerReg()); 518dc84770dSAmara Emerson 519dc84770dSAmara Emerson // Only handle scalars. 520dc84770dSAmara Emerson if (!ValueTy.isScalar()) 521dc84770dSAmara Emerson return false; 522dc84770dSAmara Emerson 523dc84770dSAmara Emerson // Don't allow truncating stores for now. 524dc84770dSAmara Emerson if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits()) 525dc84770dSAmara Emerson return false; 526dc84770dSAmara Emerson 5278cbf18cbSAmara Emerson // Avoid adding volatile or ordered stores to the candidate. We already have a 5288cbf18cbSAmara Emerson // check for this in instMayAlias() but that only get's called later between 5298cbf18cbSAmara Emerson // potential aliasing hazards. 5308cbf18cbSAmara Emerson if (!StoreMI.isSimple()) 5318cbf18cbSAmara Emerson return false; 5328cbf18cbSAmara Emerson 533dc84770dSAmara Emerson Register StoreAddr = StoreMI.getPointerReg(); 534dc84770dSAmara Emerson auto BIO = getPointerInfo(StoreAddr, *MRI); 53519f4d682SAmara Emerson Register StoreBase = BIO.getBase(); 536dc84770dSAmara Emerson if (C.Stores.empty()) { 53719f4d682SAmara Emerson C.BasePtr = StoreBase; 53819f4d682SAmara Emerson if (!BIO.hasValidOffset()) { 53919f4d682SAmara Emerson C.CurrentLowestOffset = 0; 54019f4d682SAmara Emerson } else { 54119f4d682SAmara Emerson C.CurrentLowestOffset = BIO.getOffset(); 54219f4d682SAmara Emerson } 543dc84770dSAmara Emerson // This is the first store of the candidate. 544dc84770dSAmara Emerson // If the offset can't possibly allow for a lower addressed store with the 545dc84770dSAmara Emerson // same base, don't bother adding it. 54619f4d682SAmara Emerson if (BIO.hasValidOffset() && 54719f4d682SAmara Emerson BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes())) 548dc84770dSAmara Emerson return false; 549dc84770dSAmara Emerson C.Stores.emplace_back(&StoreMI); 550dc84770dSAmara Emerson LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: " 551dc84770dSAmara Emerson << StoreMI); 552dc84770dSAmara Emerson return true; 553dc84770dSAmara Emerson } 554dc84770dSAmara Emerson 555dc84770dSAmara Emerson // Check the store is the same size as the existing ones in the candidate. 556dc84770dSAmara Emerson if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() != 557dc84770dSAmara Emerson ValueTy.getSizeInBits()) 558dc84770dSAmara Emerson return false; 559dc84770dSAmara Emerson 560dc84770dSAmara Emerson if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() != 561dc84770dSAmara Emerson PtrTy.getAddressSpace()) 562dc84770dSAmara Emerson return false; 563dc84770dSAmara Emerson 564dc84770dSAmara Emerson // There are other stores in the candidate. Check that the store address 565dc84770dSAmara Emerson // writes to the next lowest adjacent address. 566dc84770dSAmara Emerson if (C.BasePtr != StoreBase) 567dc84770dSAmara Emerson return false; 56819f4d682SAmara Emerson // If we don't have a valid offset, we can't guarantee to be an adjacent 56919f4d682SAmara Emerson // offset. 57019f4d682SAmara Emerson if (!BIO.hasValidOffset()) 57119f4d682SAmara Emerson return false; 57219f4d682SAmara Emerson if ((C.CurrentLowestOffset - 57319f4d682SAmara Emerson static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset()) 574dc84770dSAmara Emerson return false; 575dc84770dSAmara Emerson 576dc84770dSAmara Emerson // This writes to an adjacent address. Allow it. 577dc84770dSAmara Emerson C.Stores.emplace_back(&StoreMI); 578dc84770dSAmara Emerson C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes(); 579dc84770dSAmara Emerson LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI); 580dc84770dSAmara Emerson return true; 581dc84770dSAmara Emerson } 582dc84770dSAmara Emerson 583dc84770dSAmara Emerson bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) { 584dc84770dSAmara Emerson bool Changed = false; 585dc84770dSAmara Emerson // Walk through the block bottom-up, looking for merging candidates. 586dc84770dSAmara Emerson StoreMergeCandidate Candidate; 5873aed2822SKazu Hirata for (MachineInstr &MI : llvm::reverse(MBB)) { 588dc84770dSAmara Emerson if (InstsToErase.contains(&MI)) 589dc84770dSAmara Emerson continue; 590dc84770dSAmara Emerson 5913aed2822SKazu Hirata if (auto *StoreMI = dyn_cast<GStore>(&MI)) { 592dc84770dSAmara Emerson // We have a G_STORE. Add it to the candidate if it writes to an adjacent 593dc84770dSAmara Emerson // address. 594dc84770dSAmara Emerson if (!addStoreToCandidate(*StoreMI, Candidate)) { 595dc84770dSAmara Emerson // Store wasn't eligible to be added. May need to record it as a 596dc84770dSAmara Emerson // potential alias. 597dc84770dSAmara Emerson if (operationAliasesWithCandidate(*StoreMI, Candidate)) { 598dc84770dSAmara Emerson Changed |= processMergeCandidate(Candidate); 599dc84770dSAmara Emerson continue; 600dc84770dSAmara Emerson } 601dc84770dSAmara Emerson Candidate.addPotentialAlias(*StoreMI); 602dc84770dSAmara Emerson } 603dc84770dSAmara Emerson continue; 604dc84770dSAmara Emerson } 605dc84770dSAmara Emerson 606dc84770dSAmara Emerson // If we don't have any stores yet, this instruction can't pose a problem. 607dc84770dSAmara Emerson if (Candidate.Stores.empty()) 608dc84770dSAmara Emerson continue; 609dc84770dSAmara Emerson 610dc84770dSAmara Emerson // We're dealing with some other kind of instruction. 611dc84770dSAmara Emerson if (isInstHardMergeHazard(MI)) { 612dc84770dSAmara Emerson Changed |= processMergeCandidate(Candidate); 613dc84770dSAmara Emerson Candidate.Stores.clear(); 614dc84770dSAmara Emerson continue; 615dc84770dSAmara Emerson } 616dc84770dSAmara Emerson 617dc84770dSAmara Emerson if (!MI.mayLoadOrStore()) 618dc84770dSAmara Emerson continue; 619dc84770dSAmara Emerson 620dc84770dSAmara Emerson if (operationAliasesWithCandidate(MI, Candidate)) { 621dc84770dSAmara Emerson // We have a potential alias, so process the current candidate if we can 622dc84770dSAmara Emerson // and then continue looking for a new candidate. 623dc84770dSAmara Emerson Changed |= processMergeCandidate(Candidate); 624dc84770dSAmara Emerson continue; 625dc84770dSAmara Emerson } 626dc84770dSAmara Emerson 627dc84770dSAmara Emerson // Record this instruction as a potential alias for future stores that are 628dc84770dSAmara Emerson // added to the candidate. 629dc84770dSAmara Emerson Candidate.addPotentialAlias(MI); 630dc84770dSAmara Emerson } 631dc84770dSAmara Emerson 632dc84770dSAmara Emerson // Process any candidate left after finishing searching the entire block. 633dc84770dSAmara Emerson Changed |= processMergeCandidate(Candidate); 634dc84770dSAmara Emerson 635dc84770dSAmara Emerson // Erase instructions now that we're no longer iterating over the block. 636dc84770dSAmara Emerson for (auto *MI : InstsToErase) 637dc84770dSAmara Emerson MI->eraseFromParent(); 638dc84770dSAmara Emerson InstsToErase.clear(); 639dc84770dSAmara Emerson return Changed; 640dc84770dSAmara Emerson } 641dc84770dSAmara Emerson 64229c851f4SAmara Emerson /// Check if the store \p Store is a truncstore that can be merged. That is, 64329c851f4SAmara Emerson /// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty 64429c851f4SAmara Emerson /// Register then it does not need to match and SrcVal is set to the source 64529c851f4SAmara Emerson /// value found. 64629c851f4SAmara Emerson /// On match, returns the start byte offset of the \p SrcVal that is being 64729c851f4SAmara Emerson /// stored. 64829c851f4SAmara Emerson static std::optional<int64_t> 64929c851f4SAmara Emerson getTruncStoreByteOffset(GStore &Store, Register &SrcVal, 65029c851f4SAmara Emerson MachineRegisterInfo &MRI) { 65129c851f4SAmara Emerson Register TruncVal; 65229c851f4SAmara Emerson if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal)))) 65329c851f4SAmara Emerson return std::nullopt; 65429c851f4SAmara Emerson 65529c851f4SAmara Emerson // The shift amount must be a constant multiple of the narrow type. 65629c851f4SAmara Emerson // It is translated to the offset address in the wide source value "y". 65729c851f4SAmara Emerson // 65829c851f4SAmara Emerson // x = G_LSHR y, ShiftAmtC 65929c851f4SAmara Emerson // s8 z = G_TRUNC x 66029c851f4SAmara Emerson // store z, ... 66129c851f4SAmara Emerson Register FoundSrcVal; 66229c851f4SAmara Emerson int64_t ShiftAmt; 66329c851f4SAmara Emerson if (!mi_match(TruncVal, MRI, 66429c851f4SAmara Emerson m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)), 66529c851f4SAmara Emerson m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) { 66629c851f4SAmara Emerson if (!SrcVal.isValid() || TruncVal == SrcVal) { 66729c851f4SAmara Emerson if (!SrcVal.isValid()) 66829c851f4SAmara Emerson SrcVal = TruncVal; 66929c851f4SAmara Emerson return 0; // If it's the lowest index store. 67029c851f4SAmara Emerson } 67129c851f4SAmara Emerson return std::nullopt; 67229c851f4SAmara Emerson } 67329c851f4SAmara Emerson 67429c851f4SAmara Emerson unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits(); 67529c851f4SAmara Emerson if (ShiftAmt % NarrowBits != 0) 67629c851f4SAmara Emerson return std::nullopt; 67729c851f4SAmara Emerson const unsigned Offset = ShiftAmt / NarrowBits; 67829c851f4SAmara Emerson 67929c851f4SAmara Emerson if (SrcVal.isValid() && FoundSrcVal != SrcVal) 68029c851f4SAmara Emerson return std::nullopt; 68129c851f4SAmara Emerson 68229c851f4SAmara Emerson if (!SrcVal.isValid()) 68329c851f4SAmara Emerson SrcVal = FoundSrcVal; 68429c851f4SAmara Emerson else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal)) 68529c851f4SAmara Emerson return std::nullopt; 68629c851f4SAmara Emerson return Offset; 68729c851f4SAmara Emerson } 68829c851f4SAmara Emerson 68929c851f4SAmara Emerson /// Match a pattern where a wide type scalar value is stored by several narrow 69029c851f4SAmara Emerson /// stores. Fold it into a single store or a BSWAP and a store if the targets 69129c851f4SAmara Emerson /// supports it. 69229c851f4SAmara Emerson /// 69329c851f4SAmara Emerson /// Assuming little endian target: 69429c851f4SAmara Emerson /// i8 *p = ... 69529c851f4SAmara Emerson /// i32 val = ... 69629c851f4SAmara Emerson /// p[0] = (val >> 0) & 0xFF; 69729c851f4SAmara Emerson /// p[1] = (val >> 8) & 0xFF; 69829c851f4SAmara Emerson /// p[2] = (val >> 16) & 0xFF; 69929c851f4SAmara Emerson /// p[3] = (val >> 24) & 0xFF; 70029c851f4SAmara Emerson /// => 70129c851f4SAmara Emerson /// *((i32)p) = val; 70229c851f4SAmara Emerson /// 70329c851f4SAmara Emerson /// i8 *p = ... 70429c851f4SAmara Emerson /// i32 val = ... 70529c851f4SAmara Emerson /// p[0] = (val >> 24) & 0xFF; 70629c851f4SAmara Emerson /// p[1] = (val >> 16) & 0xFF; 70729c851f4SAmara Emerson /// p[2] = (val >> 8) & 0xFF; 70829c851f4SAmara Emerson /// p[3] = (val >> 0) & 0xFF; 70929c851f4SAmara Emerson /// => 71029c851f4SAmara Emerson /// *((i32)p) = BSWAP(val); 71129c851f4SAmara Emerson bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI, 71229c851f4SAmara Emerson SmallPtrSetImpl<GStore *> &DeletedStores) { 71329c851f4SAmara Emerson LLT MemTy = StoreMI.getMMO().getMemoryType(); 71429c851f4SAmara Emerson 71529c851f4SAmara Emerson // We only handle merging simple stores of 1-4 bytes. 71629c851f4SAmara Emerson if (!MemTy.isScalar()) 71729c851f4SAmara Emerson return false; 71829c851f4SAmara Emerson switch (MemTy.getSizeInBits()) { 71929c851f4SAmara Emerson case 8: 72029c851f4SAmara Emerson case 16: 72129c851f4SAmara Emerson case 32: 72229c851f4SAmara Emerson break; 72329c851f4SAmara Emerson default: 72429c851f4SAmara Emerson return false; 72529c851f4SAmara Emerson } 72629c851f4SAmara Emerson if (!StoreMI.isSimple()) 72729c851f4SAmara Emerson return false; 72829c851f4SAmara Emerson 72929c851f4SAmara Emerson // We do a simple search for mergeable stores prior to this one. 73029c851f4SAmara Emerson // Any potential alias hazard along the way terminates the search. 73129c851f4SAmara Emerson SmallVector<GStore *> FoundStores; 73229c851f4SAmara Emerson 73329c851f4SAmara Emerson // We're looking for: 73429c851f4SAmara Emerson // 1) a (store(trunc(...))) 73529c851f4SAmara Emerson // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get 73629c851f4SAmara Emerson // the partial value stored. 73729c851f4SAmara Emerson // 3) where the offsets form either a little or big-endian sequence. 73829c851f4SAmara Emerson 73929c851f4SAmara Emerson auto &LastStore = StoreMI; 74029c851f4SAmara Emerson 74129c851f4SAmara Emerson // The single base pointer that all stores must use. 74229c851f4SAmara Emerson Register BaseReg; 74329c851f4SAmara Emerson int64_t LastOffset; 74429c851f4SAmara Emerson if (!mi_match(LastStore.getPointerReg(), *MRI, 74529c851f4SAmara Emerson m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) { 74629c851f4SAmara Emerson BaseReg = LastStore.getPointerReg(); 74729c851f4SAmara Emerson LastOffset = 0; 74829c851f4SAmara Emerson } 74929c851f4SAmara Emerson 75029c851f4SAmara Emerson GStore *LowestIdxStore = &LastStore; 75129c851f4SAmara Emerson int64_t LowestIdxOffset = LastOffset; 75229c851f4SAmara Emerson 75329c851f4SAmara Emerson Register WideSrcVal; 75429c851f4SAmara Emerson auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, *MRI); 75529c851f4SAmara Emerson if (!LowestShiftAmt) 75629c851f4SAmara Emerson return false; // Didn't match a trunc. 75729c851f4SAmara Emerson assert(WideSrcVal.isValid()); 75829c851f4SAmara Emerson 75929c851f4SAmara Emerson LLT WideStoreTy = MRI->getType(WideSrcVal); 76029c851f4SAmara Emerson // The wide type might not be a multiple of the memory type, e.g. s48 and s32. 76129c851f4SAmara Emerson if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0) 76229c851f4SAmara Emerson return false; 76329c851f4SAmara Emerson const unsigned NumStoresRequired = 76429c851f4SAmara Emerson WideStoreTy.getSizeInBits() / MemTy.getSizeInBits(); 76529c851f4SAmara Emerson 76629c851f4SAmara Emerson SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX); 76729c851f4SAmara Emerson OffsetMap[*LowestShiftAmt] = LastOffset; 76829c851f4SAmara Emerson FoundStores.emplace_back(&LastStore); 76929c851f4SAmara Emerson 77029c851f4SAmara Emerson const int MaxInstsToCheck = 10; 77129c851f4SAmara Emerson int NumInstsChecked = 0; 77229c851f4SAmara Emerson for (auto II = ++LastStore.getReverseIterator(); 77329c851f4SAmara Emerson II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck; 77429c851f4SAmara Emerson ++II) { 77529c851f4SAmara Emerson NumInstsChecked++; 77629c851f4SAmara Emerson GStore *NewStore; 77729c851f4SAmara Emerson if ((NewStore = dyn_cast<GStore>(&*II))) { 77829c851f4SAmara Emerson if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple()) 77929c851f4SAmara Emerson break; 78029c851f4SAmara Emerson } else if (II->isLoadFoldBarrier() || II->mayLoad()) { 78129c851f4SAmara Emerson break; 78229c851f4SAmara Emerson } else { 78329c851f4SAmara Emerson continue; // This is a safe instruction we can look past. 78429c851f4SAmara Emerson } 78529c851f4SAmara Emerson 78629c851f4SAmara Emerson Register NewBaseReg; 78729c851f4SAmara Emerson int64_t MemOffset; 78829c851f4SAmara Emerson // Check we're storing to the same base + some offset. 78929c851f4SAmara Emerson if (!mi_match(NewStore->getPointerReg(), *MRI, 79029c851f4SAmara Emerson m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) { 79129c851f4SAmara Emerson NewBaseReg = NewStore->getPointerReg(); 79229c851f4SAmara Emerson MemOffset = 0; 79329c851f4SAmara Emerson } 79429c851f4SAmara Emerson if (BaseReg != NewBaseReg) 79529c851f4SAmara Emerson break; 79629c851f4SAmara Emerson 79729c851f4SAmara Emerson auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, *MRI); 79829c851f4SAmara Emerson if (!ShiftByteOffset) 79929c851f4SAmara Emerson break; 80029c851f4SAmara Emerson if (MemOffset < LowestIdxOffset) { 80129c851f4SAmara Emerson LowestIdxOffset = MemOffset; 80229c851f4SAmara Emerson LowestIdxStore = NewStore; 80329c851f4SAmara Emerson } 80429c851f4SAmara Emerson 80529c851f4SAmara Emerson // Map the offset in the store and the offset in the combined value, and 80629c851f4SAmara Emerson // early return if it has been set before. 80729c851f4SAmara Emerson if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired || 80829c851f4SAmara Emerson OffsetMap[*ShiftByteOffset] != INT64_MAX) 80929c851f4SAmara Emerson break; 81029c851f4SAmara Emerson OffsetMap[*ShiftByteOffset] = MemOffset; 81129c851f4SAmara Emerson 81229c851f4SAmara Emerson FoundStores.emplace_back(NewStore); 81329c851f4SAmara Emerson // Reset counter since we've found a matching inst. 81429c851f4SAmara Emerson NumInstsChecked = 0; 81529c851f4SAmara Emerson if (FoundStores.size() == NumStoresRequired) 81629c851f4SAmara Emerson break; 81729c851f4SAmara Emerson } 81829c851f4SAmara Emerson 81929c851f4SAmara Emerson if (FoundStores.size() != NumStoresRequired) { 82029c851f4SAmara Emerson if (FoundStores.size() == 1) 82129c851f4SAmara Emerson return false; 82229c851f4SAmara Emerson // We didn't find enough stores to merge into the size of the original 82329c851f4SAmara Emerson // source value, but we may be able to generate a smaller store if we 82429c851f4SAmara Emerson // truncate the source value. 82529c851f4SAmara Emerson WideStoreTy = LLT::scalar(FoundStores.size() * MemTy.getScalarSizeInBits()); 82629c851f4SAmara Emerson } 82729c851f4SAmara Emerson 82829c851f4SAmara Emerson unsigned NumStoresFound = FoundStores.size(); 82929c851f4SAmara Emerson 83029c851f4SAmara Emerson const auto &DL = LastStore.getMF()->getDataLayout(); 83129c851f4SAmara Emerson auto &C = LastStore.getMF()->getFunction().getContext(); 83229c851f4SAmara Emerson // Check that a store of the wide type is both allowed and fast on the target 83329c851f4SAmara Emerson unsigned Fast = 0; 83429c851f4SAmara Emerson bool Allowed = TLI->allowsMemoryAccess( 83529c851f4SAmara Emerson C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); 83629c851f4SAmara Emerson if (!Allowed || !Fast) 83729c851f4SAmara Emerson return false; 83829c851f4SAmara Emerson 83929c851f4SAmara Emerson // Check if the pieces of the value are going to the expected places in memory 84029c851f4SAmara Emerson // to merge the stores. 84129c851f4SAmara Emerson unsigned NarrowBits = MemTy.getScalarSizeInBits(); 84229c851f4SAmara Emerson auto checkOffsets = [&](bool MatchLittleEndian) { 84329c851f4SAmara Emerson if (MatchLittleEndian) { 84429c851f4SAmara Emerson for (unsigned i = 0; i != NumStoresFound; ++i) 84529c851f4SAmara Emerson if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset) 84629c851f4SAmara Emerson return false; 84729c851f4SAmara Emerson } else { // MatchBigEndian by reversing loop counter. 84829c851f4SAmara Emerson for (unsigned i = 0, j = NumStoresFound - 1; i != NumStoresFound; 84929c851f4SAmara Emerson ++i, --j) 85029c851f4SAmara Emerson if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset) 85129c851f4SAmara Emerson return false; 85229c851f4SAmara Emerson } 85329c851f4SAmara Emerson return true; 85429c851f4SAmara Emerson }; 85529c851f4SAmara Emerson 85629c851f4SAmara Emerson // Check if the offsets line up for the native data layout of this target. 85729c851f4SAmara Emerson bool NeedBswap = false; 85829c851f4SAmara Emerson bool NeedRotate = false; 85929c851f4SAmara Emerson if (!checkOffsets(DL.isLittleEndian())) { 86029c851f4SAmara Emerson // Special-case: check if byte offsets line up for the opposite endian. 86129c851f4SAmara Emerson if (NarrowBits == 8 && checkOffsets(DL.isBigEndian())) 86229c851f4SAmara Emerson NeedBswap = true; 86329c851f4SAmara Emerson else if (NumStoresFound == 2 && checkOffsets(DL.isBigEndian())) 86429c851f4SAmara Emerson NeedRotate = true; 86529c851f4SAmara Emerson else 86629c851f4SAmara Emerson return false; 86729c851f4SAmara Emerson } 86829c851f4SAmara Emerson 86929c851f4SAmara Emerson if (NeedBswap && 87029c851f4SAmara Emerson !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}, *MF)) 87129c851f4SAmara Emerson return false; 87229c851f4SAmara Emerson if (NeedRotate && 87329c851f4SAmara Emerson !isLegalOrBeforeLegalizer( 87429c851f4SAmara Emerson {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, *MF)) 87529c851f4SAmara Emerson return false; 87629c851f4SAmara Emerson 87729c851f4SAmara Emerson Builder.setInstrAndDebugLoc(StoreMI); 87829c851f4SAmara Emerson 87929c851f4SAmara Emerson if (WideStoreTy != MRI->getType(WideSrcVal)) 88029c851f4SAmara Emerson WideSrcVal = Builder.buildTrunc(WideStoreTy, WideSrcVal).getReg(0); 88129c851f4SAmara Emerson 88229c851f4SAmara Emerson if (NeedBswap) { 88329c851f4SAmara Emerson WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0); 88429c851f4SAmara Emerson } else if (NeedRotate) { 88529c851f4SAmara Emerson assert(WideStoreTy.getSizeInBits() % 2 == 0 && 88629c851f4SAmara Emerson "Unexpected type for rotate"); 88729c851f4SAmara Emerson auto RotAmt = 88829c851f4SAmara Emerson Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2); 88929c851f4SAmara Emerson WideSrcVal = 89029c851f4SAmara Emerson Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0); 89129c851f4SAmara Emerson } 89229c851f4SAmara Emerson 89329c851f4SAmara Emerson Builder.buildStore(WideSrcVal, LowestIdxStore->getPointerReg(), 89429c851f4SAmara Emerson LowestIdxStore->getMMO().getPointerInfo(), 89529c851f4SAmara Emerson LowestIdxStore->getMMO().getAlign()); 89629c851f4SAmara Emerson 89729c851f4SAmara Emerson // Erase the old stores. 89829c851f4SAmara Emerson for (auto *ST : FoundStores) { 89929c851f4SAmara Emerson ST->eraseFromParent(); 90029c851f4SAmara Emerson DeletedStores.insert(ST); 90129c851f4SAmara Emerson } 90229c851f4SAmara Emerson return true; 90329c851f4SAmara Emerson } 90429c851f4SAmara Emerson 90529c851f4SAmara Emerson bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) { 90629c851f4SAmara Emerson bool Changed = false; 90729c851f4SAmara Emerson SmallVector<GStore *, 16> Stores; 90829c851f4SAmara Emerson SmallPtrSet<GStore *, 8> DeletedStores; 90929c851f4SAmara Emerson // Walk up the block so we can see the most eligible stores. 91029c851f4SAmara Emerson for (MachineInstr &MI : llvm::reverse(BB)) 91129c851f4SAmara Emerson if (auto *StoreMI = dyn_cast<GStore>(&MI)) 91229c851f4SAmara Emerson Stores.emplace_back(StoreMI); 91329c851f4SAmara Emerson 91429c851f4SAmara Emerson for (auto *StoreMI : Stores) { 91529c851f4SAmara Emerson if (DeletedStores.count(StoreMI)) 91629c851f4SAmara Emerson continue; 91729c851f4SAmara Emerson if (mergeTruncStore(*StoreMI, DeletedStores)) 91829c851f4SAmara Emerson Changed = true; 91929c851f4SAmara Emerson } 92029c851f4SAmara Emerson return Changed; 92129c851f4SAmara Emerson } 92229c851f4SAmara Emerson 923dc84770dSAmara Emerson bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) { 924dc84770dSAmara Emerson bool Changed = false; 925dc84770dSAmara Emerson for (auto &BB : MF){ 926dc84770dSAmara Emerson Changed |= mergeBlockStores(BB); 92729c851f4SAmara Emerson Changed |= mergeTruncStoresBlock(BB); 928dc84770dSAmara Emerson } 92929c851f4SAmara Emerson 93029c851f4SAmara Emerson // Erase all dead instructions left over by the merging. 93129c851f4SAmara Emerson if (Changed) { 93229c851f4SAmara Emerson for (auto &BB : MF) { 93329c851f4SAmara Emerson for (auto &I : make_early_inc_range(make_range(BB.rbegin(), BB.rend()))) { 93429c851f4SAmara Emerson if (isTriviallyDead(I, *MRI)) 93529c851f4SAmara Emerson I.eraseFromParent(); 93629c851f4SAmara Emerson } 93729c851f4SAmara Emerson } 93829c851f4SAmara Emerson } 93929c851f4SAmara Emerson 940dc84770dSAmara Emerson return Changed; 941dc84770dSAmara Emerson } 942dc84770dSAmara Emerson 943dc84770dSAmara Emerson void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) { 944dc84770dSAmara Emerson // Query the legalizer info to record what store types are legal. 945dc84770dSAmara Emerson // We record this because we don't want to bother trying to merge stores into 946dc84770dSAmara Emerson // illegal ones, which would just result in being split again. 947dc84770dSAmara Emerson 948dc84770dSAmara Emerson if (LegalStoreSizes.count(AddrSpace)) { 949dc84770dSAmara Emerson assert(LegalStoreSizes[AddrSpace].any()); 950dc84770dSAmara Emerson return; // Already cached sizes for this address space. 951dc84770dSAmara Emerson } 952dc84770dSAmara Emerson 953dc84770dSAmara Emerson // Need to reserve at least MaxStoreSizeToForm + 1 bits. 954dc84770dSAmara Emerson BitVector LegalSizes(MaxStoreSizeToForm * 2); 955dc84770dSAmara Emerson const auto &LI = *MF->getSubtarget().getLegalizerInfo(); 9569df71d76SNikita Popov const auto &DL = MF->getFunction().getDataLayout(); 957a7ee80faSBjorn Pettersson Type *IRPtrTy = PointerType::get(MF->getFunction().getContext(), AddrSpace); 958a7ee80faSBjorn Pettersson LLT PtrTy = getLLTForType(*IRPtrTy, DL); 959dc84770dSAmara Emerson // We assume that we're not going to be generating any stores wider than 960dc84770dSAmara Emerson // MaxStoreSizeToForm bits for now. 961dc84770dSAmara Emerson for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) { 962dc84770dSAmara Emerson LLT Ty = LLT::scalar(Size); 963dc84770dSAmara Emerson SmallVector<LegalityQuery::MemDesc, 2> MemDescrs( 964dc84770dSAmara Emerson {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}}); 965dc84770dSAmara Emerson SmallVector<LLT> StoreTys({Ty, PtrTy}); 966dc84770dSAmara Emerson LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs); 967dc84770dSAmara Emerson LegalizeActionStep ActionStep = LI.getAction(Q); 968dc84770dSAmara Emerson if (ActionStep.Action == LegalizeActions::Legal) 969dc84770dSAmara Emerson LegalSizes.set(Size); 970dc84770dSAmara Emerson } 971dc84770dSAmara Emerson assert(LegalSizes.any() && "Expected some store sizes to be legal!"); 972dc84770dSAmara Emerson LegalStoreSizes[AddrSpace] = LegalSizes; 973dc84770dSAmara Emerson } 974dc84770dSAmara Emerson 975dc84770dSAmara Emerson bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) { 976dc84770dSAmara Emerson // If the ISel pipeline failed, do not bother running that pass. 977dc84770dSAmara Emerson if (MF.getProperties().hasProperty( 978dc84770dSAmara Emerson MachineFunctionProperties::Property::FailedISel)) 979dc84770dSAmara Emerson return false; 980dc84770dSAmara Emerson 981dc84770dSAmara Emerson LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName() 982dc84770dSAmara Emerson << '\n'); 983dc84770dSAmara Emerson 984dc84770dSAmara Emerson init(MF); 985dc84770dSAmara Emerson bool Changed = false; 986dc84770dSAmara Emerson Changed |= mergeFunctionStores(MF); 987dc84770dSAmara Emerson 988dc84770dSAmara Emerson LegalStoreSizes.clear(); 989dc84770dSAmara Emerson return Changed; 990dc84770dSAmara Emerson } 991