1 //===- SeedCollector.cpp -0000000-----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" 10 #include "llvm/ADT/STLExtras.h" 11 #include "llvm/Analysis/LoopAccessAnalysis.h" 12 #include "llvm/Analysis/ValueTracking.h" 13 #include "llvm/IR/BasicBlock.h" 14 #include "llvm/IR/Type.h" 15 #include "llvm/SandboxIR/Instruction.h" 16 #include "llvm/SandboxIR/Utils.h" 17 #include "llvm/Support/Debug.h" 18 19 using namespace llvm; 20 namespace llvm::sandboxir { 21 22 cl::opt<unsigned> SeedBundleSizeLimit( 23 "sbvec-seed-bundle-size-limit", cl::init(32), cl::Hidden, 24 cl::desc("Limit the size of the seed bundle to cap compilation time.")); 25 #define LoadSeedsDef "loads" 26 #define StoreSeedsDef "stores" 27 cl::opt<std::string> CollectSeeds( 28 "sbvec-collect-seeds", cl::init(LoadSeedsDef "," StoreSeedsDef), cl::Hidden, 29 cl::desc("Collect these seeds. Use empty for none or a comma-separated " 30 "list of '" LoadSeedsDef "' and '" StoreSeedsDef "'.")); 31 cl::opt<unsigned> SeedGroupsLimit( 32 "sbvec-seed-groups-limit", cl::init(256), cl::Hidden, 33 cl::desc("Limit the number of collected seeds groups in a BB to " 34 "cap compilation time.")); 35 36 MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx, 37 unsigned MaxVecRegBits, 38 bool ForcePowerOf2) { 39 // Use uint32_t here for compatibility with IsPowerOf2_32 40 41 // BitCount tracks the size of the working slice. From that we can tell 42 // when the working slice's size is a power-of-two and when it exceeds 43 // the legal size in MaxVecBits. 44 uint32_t BitCount = 0; 45 uint32_t NumElements = 0; 46 // Tracks the most recent slice where NumElements gave a power-of-2 BitCount 47 uint32_t NumElementsPowerOfTwo = 0; 48 uint32_t BitCountPowerOfTwo = 0; 49 // Can't start a slice with a used instruction. 50 assert(!isUsed(StartIdx) && "Expected unused at StartIdx"); 51 for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) { 52 uint32_t InstBits = Utils::getNumBits(S); 53 // Stop if this instruction is used, or if adding it puts the slice over 54 // the limit. 55 if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits) 56 break; 57 NumElements++; 58 BitCount += InstBits; 59 if (ForcePowerOf2 && isPowerOf2_32(BitCount)) { 60 NumElementsPowerOfTwo = NumElements; 61 BitCountPowerOfTwo = BitCount; 62 } 63 } 64 if (ForcePowerOf2) { 65 NumElements = NumElementsPowerOfTwo; 66 BitCount = BitCountPowerOfTwo; 67 } 68 69 assert((!ForcePowerOf2 || isPowerOf2_32(BitCount)) && 70 "Must be a power of two"); 71 // Return any non-empty slice 72 if (NumElements > 1) 73 return MutableArrayRef<Instruction *>(&Seeds[StartIdx], NumElements); 74 else 75 return {}; 76 } 77 78 template <typename LoadOrStoreT> 79 SeedContainer::KeyT SeedContainer::getKey(LoadOrStoreT *LSI) const { 80 assert((isa<LoadInst>(LSI) || isa<StoreInst>(LSI)) && 81 "Expected Load or Store!"); 82 Value *Ptr = Utils::getMemInstructionBase(LSI); 83 Instruction::Opcode Op = LSI->getOpcode(); 84 Type *Ty = Utils::getExpectedType(LSI); 85 if (auto *VTy = dyn_cast<VectorType>(Ty)) 86 Ty = VTy->getElementType(); 87 return {Ptr, Ty, Op}; 88 } 89 90 // Explicit instantiations 91 template SeedContainer::KeyT 92 SeedContainer::getKey<LoadInst>(LoadInst *LSI) const; 93 template SeedContainer::KeyT 94 SeedContainer::getKey<StoreInst>(StoreInst *LSI) const; 95 96 bool SeedContainer::erase(Instruction *I) { 97 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Expected Load or Store!"); 98 auto It = SeedLookupMap.find(I); 99 if (It == SeedLookupMap.end()) 100 return false; 101 SeedBundle *Bndl = It->second; 102 Bndl->setUsed(I); 103 return true; 104 } 105 106 template <typename LoadOrStoreT> void SeedContainer::insert(LoadOrStoreT *LSI) { 107 // Find the bundle containing seeds for this symbol and type-of-access. 108 auto &BundleVec = Bundles[getKey(LSI)]; 109 // Fill this vector of bundles front to back so that only the last bundle in 110 // the vector may have available space. This avoids iteration to find one with 111 // space. 112 if (BundleVec.empty() || BundleVec.back()->size() == SeedBundleSizeLimit) 113 BundleVec.emplace_back(std::make_unique<MemSeedBundle<LoadOrStoreT>>(LSI)); 114 else 115 BundleVec.back()->insert(LSI, SE); 116 117 SeedLookupMap[LSI] = BundleVec.back().get(); 118 } 119 120 // Explicit instantiations 121 template void SeedContainer::insert<LoadInst>(LoadInst *); 122 template void SeedContainer::insert<StoreInst>(StoreInst *); 123 124 #ifndef NDEBUG 125 void SeedContainer::print(raw_ostream &OS) const { 126 for (const auto &Pair : Bundles) { 127 auto [I, Ty, Opc] = Pair.first; 128 const auto &SeedsVec = Pair.second; 129 std::string RefType = dyn_cast<LoadInst>(I) ? "Load" 130 : dyn_cast<StoreInst>(I) ? "Store" 131 : "Other"; 132 OS << "[Inst=" << *I << " Ty=" << Ty << " " << RefType << "]\n"; 133 for (const auto &SeedPtr : SeedsVec) { 134 SeedPtr->dump(OS); 135 OS << "\n"; 136 } 137 } 138 OS << "\n"; 139 } 140 141 LLVM_DUMP_METHOD void SeedContainer::dump() const { print(dbgs()); } 142 #endif // NDEBUG 143 144 template <typename LoadOrStoreT> static bool isValidMemSeed(LoadOrStoreT *LSI) { 145 if (LSI->isSimple()) 146 return true; 147 auto *Ty = Utils::getExpectedType(LSI); 148 // Omit types that are architecturally unvectorizable 149 if (Ty->isX86_FP80Ty() || Ty->isPPC_FP128Ty()) 150 return false; 151 // Omit vector types without compile-time-known lane counts 152 if (isa<ScalableVectorType>(Ty)) 153 return false; 154 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) 155 return VectorType::isValidElementType(VTy->getElementType()); 156 return VectorType::isValidElementType(Ty); 157 } 158 159 template bool isValidMemSeed<LoadInst>(LoadInst *LSI); 160 template bool isValidMemSeed<StoreInst>(StoreInst *LSI); 161 162 SeedCollector::SeedCollector(BasicBlock *BB, ScalarEvolution &SE) 163 : StoreSeeds(SE), LoadSeeds(SE), Ctx(BB->getContext()) { 164 // TODO: Register a callback for updating the Collector data structures upon 165 // instr removal 166 167 bool CollectStores = CollectSeeds.find(StoreSeedsDef) != std::string::npos; 168 bool CollectLoads = CollectSeeds.find(LoadSeedsDef) != std::string::npos; 169 if (!CollectStores && !CollectLoads) 170 return; 171 // Actually collect the seeds. 172 for (auto &I : *BB) { 173 if (StoreInst *SI = dyn_cast<StoreInst>(&I)) 174 if (CollectStores && isValidMemSeed(SI)) 175 StoreSeeds.insert(SI); 176 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) 177 if (CollectLoads && isValidMemSeed(LI)) 178 LoadSeeds.insert(LI); 179 // Cap compilation time. 180 if (totalNumSeedGroups() > SeedGroupsLimit) 181 break; 182 } 183 } 184 185 SeedCollector::~SeedCollector() { 186 // TODO: Unregister the callback for updating the seed datastructures upon 187 // instr removal 188 } 189 190 #ifndef NDEBUG 191 void SeedCollector::print(raw_ostream &OS) const { 192 OS << "=== StoreSeeds ===\n"; 193 StoreSeeds.print(OS); 194 OS << "=== LoadSeeds ===\n"; 195 LoadSeeds.print(OS); 196 } 197 198 void SeedCollector::dump() const { print(dbgs()); } 199 #endif 200 201 } // namespace llvm::sandboxir 202