1 //===- SeedCollector.cpp -0000000-----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" 10 #include "llvm/Analysis/LoopAccessAnalysis.h" 11 #include "llvm/Analysis/ValueTracking.h" 12 #include "llvm/IR/Type.h" 13 #include "llvm/SandboxIR/Instruction.h" 14 #include "llvm/SandboxIR/Utils.h" 15 #include "llvm/Support/Debug.h" 16 17 using namespace llvm; 18 namespace llvm::sandboxir { 19 20 cl::opt<unsigned> SeedBundleSizeLimit( 21 "sbvec-seed-bundle-size-limit", cl::init(32), cl::Hidden, 22 cl::desc("Limit the size of the seed bundle to cap compilation time.")); 23 #define LoadSeedsDef "loads" 24 #define StoreSeedsDef "stores" 25 cl::opt<std::string> CollectSeeds( 26 "sbvec-collect-seeds", cl::init(LoadSeedsDef "," StoreSeedsDef), cl::Hidden, 27 cl::desc("Collect these seeds. Use empty for none or a comma-separated " 28 "list of '" LoadSeedsDef "' and '" StoreSeedsDef "'.")); 29 cl::opt<unsigned> SeedGroupsLimit( 30 "sbvec-seed-groups-limit", cl::init(256), cl::Hidden, 31 cl::desc("Limit the number of collected seeds groups in a BB to " 32 "cap compilation time.")); 33 34 MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx, 35 unsigned MaxVecRegBits, 36 bool ForcePowerOf2) { 37 // Use uint32_t here for compatibility with IsPowerOf2_32 38 39 // BitCount tracks the size of the working slice. From that we can tell 40 // when the working slice's size is a power-of-two and when it exceeds 41 // the legal size in MaxVecBits. 42 uint32_t BitCount = 0; 43 uint32_t NumElements = 0; 44 // Tracks the most recent slice where NumElements gave a power-of-2 BitCount 45 uint32_t NumElementsPowerOfTwo = 0; 46 uint32_t BitCountPowerOfTwo = 0; 47 // Can't start a slice with a used instruction. 48 assert(!isUsed(StartIdx) && "Expected unused at StartIdx"); 49 for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) { 50 uint32_t InstBits = Utils::getNumBits(S); 51 // Stop if this instruction is used, or if adding it puts the slice over 52 // the limit. 53 if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits) 54 break; 55 NumElements++; 56 BitCount += InstBits; 57 if (ForcePowerOf2 && isPowerOf2_32(BitCount)) { 58 NumElementsPowerOfTwo = NumElements; 59 BitCountPowerOfTwo = BitCount; 60 } 61 } 62 if (ForcePowerOf2) { 63 NumElements = NumElementsPowerOfTwo; 64 BitCount = BitCountPowerOfTwo; 65 } 66 67 assert((!ForcePowerOf2 || isPowerOf2_32(BitCount)) && 68 "Must be a power of two"); 69 // Return any non-empty slice 70 if (NumElements > 1) 71 return MutableArrayRef<Instruction *>(&Seeds[StartIdx], NumElements); 72 else 73 return {}; 74 } 75 76 template <typename LoadOrStoreT> 77 SeedContainer::KeyT SeedContainer::getKey(LoadOrStoreT *LSI) const { 78 assert((isa<LoadInst>(LSI) || isa<StoreInst>(LSI)) && 79 "Expected Load or Store!"); 80 Value *Ptr = Utils::getMemInstructionBase(LSI); 81 Instruction::Opcode Op = LSI->getOpcode(); 82 Type *Ty = Utils::getExpectedType(LSI); 83 if (auto *VTy = dyn_cast<VectorType>(Ty)) 84 Ty = VTy->getElementType(); 85 return {Ptr, Ty, Op}; 86 } 87 88 // Explicit instantiations 89 template SeedContainer::KeyT 90 SeedContainer::getKey<LoadInst>(LoadInst *LSI) const; 91 template SeedContainer::KeyT 92 SeedContainer::getKey<StoreInst>(StoreInst *LSI) const; 93 94 bool SeedContainer::erase(Instruction *I) { 95 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Expected Load or Store!"); 96 auto It = SeedLookupMap.find(I); 97 if (It == SeedLookupMap.end()) 98 return false; 99 SeedBundle *Bndl = It->second; 100 Bndl->setUsed(I); 101 return true; 102 } 103 104 template <typename LoadOrStoreT> void SeedContainer::insert(LoadOrStoreT *LSI) { 105 // Find the bundle containing seeds for this symbol and type-of-access. 106 auto &BundleVec = Bundles[getKey(LSI)]; 107 // Fill this vector of bundles front to back so that only the last bundle in 108 // the vector may have available space. This avoids iteration to find one with 109 // space. 110 if (BundleVec.empty() || BundleVec.back()->size() == SeedBundleSizeLimit) 111 BundleVec.emplace_back(std::make_unique<MemSeedBundle<LoadOrStoreT>>(LSI)); 112 else 113 BundleVec.back()->insert(LSI, SE); 114 115 SeedLookupMap[LSI] = BundleVec.back().get(); 116 } 117 118 // Explicit instantiations 119 template void SeedContainer::insert<LoadInst>(LoadInst *); 120 template void SeedContainer::insert<StoreInst>(StoreInst *); 121 122 #ifndef NDEBUG 123 void SeedContainer::print(raw_ostream &OS) const { 124 for (const auto &Pair : Bundles) { 125 auto [I, Ty, Opc] = Pair.first; 126 const auto &SeedsVec = Pair.second; 127 std::string RefType = dyn_cast<LoadInst>(I) ? "Load" 128 : dyn_cast<StoreInst>(I) ? "Store" 129 : "Other"; 130 OS << "[Inst=" << *I << " Ty=" << Ty << " " << RefType << "]\n"; 131 for (const auto &SeedPtr : SeedsVec) { 132 SeedPtr->dump(OS); 133 OS << "\n"; 134 } 135 } 136 OS << "\n"; 137 } 138 139 LLVM_DUMP_METHOD void SeedContainer::dump() const { print(dbgs()); } 140 #endif // NDEBUG 141 142 template <typename LoadOrStoreT> static bool isValidMemSeed(LoadOrStoreT *LSI) { 143 if (!LSI->isSimple()) 144 return false; 145 auto *Ty = Utils::getExpectedType(LSI); 146 // Omit types that are architecturally unvectorizable 147 if (Ty->isX86_FP80Ty() || Ty->isPPC_FP128Ty()) 148 return false; 149 // Omit vector types without compile-time-known lane counts 150 if (isa<ScalableVectorType>(Ty)) 151 return false; 152 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) 153 return VectorType::isValidElementType(VTy->getElementType()); 154 return VectorType::isValidElementType(Ty); 155 } 156 157 template bool isValidMemSeed<LoadInst>(LoadInst *LSI); 158 template bool isValidMemSeed<StoreInst>(StoreInst *LSI); 159 160 SeedCollector::SeedCollector(BasicBlock *BB, ScalarEvolution &SE) 161 : StoreSeeds(SE), LoadSeeds(SE), Ctx(BB->getContext()) { 162 163 bool CollectStores = CollectSeeds.find(StoreSeedsDef) != std::string::npos; 164 bool CollectLoads = CollectSeeds.find(LoadSeedsDef) != std::string::npos; 165 if (!CollectStores && !CollectLoads) 166 return; 167 168 EraseCallbackID = Ctx.registerEraseInstrCallback([this](Instruction *I) { 169 if (auto SI = dyn_cast<StoreInst>(I)) 170 StoreSeeds.erase(SI); 171 else if (auto LI = dyn_cast<LoadInst>(I)) 172 LoadSeeds.erase(LI); 173 }); 174 175 // Actually collect the seeds. 176 for (auto &I : *BB) { 177 if (StoreInst *SI = dyn_cast<StoreInst>(&I)) 178 if (CollectStores && isValidMemSeed(SI)) 179 StoreSeeds.insert(SI); 180 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) 181 if (CollectLoads && isValidMemSeed(LI)) 182 LoadSeeds.insert(LI); 183 // Cap compilation time. 184 if (totalNumSeedGroups() > SeedGroupsLimit) 185 break; 186 } 187 } 188 189 SeedCollector::~SeedCollector() { 190 Ctx.unregisterEraseInstrCallback(EraseCallbackID); 191 } 192 193 #ifndef NDEBUG 194 void SeedCollector::print(raw_ostream &OS) const { 195 OS << "=== StoreSeeds ===\n"; 196 StoreSeeds.print(OS); 197 OS << "=== LoadSeeds ===\n"; 198 LoadSeeds.print(OS); 199 } 200 201 void SeedCollector::dump() const { print(dbgs()); } 202 #endif 203 204 } // namespace llvm::sandboxir 205