15ffd83dbSDimitry Andric //===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===// 25ffd83dbSDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // Performs general IR level optimizations on SVE intrinsics. 105ffd83dbSDimitry Andric // 11fe6060f1SDimitry Andric // This pass performs the following optimizations: 125ffd83dbSDimitry Andric // 13fe6060f1SDimitry Andric // - removes unnecessary ptrue intrinsics (llvm.aarch64.sve.ptrue), e.g: 14fe6060f1SDimitry Andric // %1 = @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 15fe6060f1SDimitry Andric // %2 = @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 16fe6060f1SDimitry Andric // ; (%1 can be replaced with a reinterpret of %2) 175ffd83dbSDimitry Andric // 18fe6060f1SDimitry Andric // - optimizes ptest intrinsics where the operands are being needlessly 19fe6060f1SDimitry Andric // converted to and from svbool_t. 205ffd83dbSDimitry Andric // 215ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 225ffd83dbSDimitry Andric 23fe6060f1SDimitry Andric #include "AArch64.h" 245ffd83dbSDimitry Andric #include "Utils/AArch64BaseInfo.h" 255ffd83dbSDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 265ffd83dbSDimitry Andric #include "llvm/ADT/SetVector.h" 275ffd83dbSDimitry Andric #include "llvm/IR/Constants.h" 285ffd83dbSDimitry Andric #include "llvm/IR/Dominators.h" 295ffd83dbSDimitry Andric #include "llvm/IR/IRBuilder.h" 305ffd83dbSDimitry Andric #include "llvm/IR/Instructions.h" 315ffd83dbSDimitry Andric #include "llvm/IR/IntrinsicInst.h" 325ffd83dbSDimitry Andric #include "llvm/IR/IntrinsicsAArch64.h" 335ffd83dbSDimitry Andric #include "llvm/IR/LLVMContext.h" 34*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h" 355ffd83dbSDimitry Andric #include "llvm/IR/PatternMatch.h" 365ffd83dbSDimitry Andric #include "llvm/InitializePasses.h" 375ffd83dbSDimitry Andric #include "llvm/Support/Debug.h" 38bdd1243dSDimitry Andric #include <optional> 395ffd83dbSDimitry Andric 405ffd83dbSDimitry Andric using namespace llvm; 415ffd83dbSDimitry Andric using namespace llvm::PatternMatch; 425ffd83dbSDimitry Andric 43e8d8bef9SDimitry Andric #define DEBUG_TYPE "aarch64-sve-intrinsic-opts" 445ffd83dbSDimitry Andric 455ffd83dbSDimitry Andric namespace { 465ffd83dbSDimitry Andric struct SVEIntrinsicOpts : public ModulePass { 475ffd83dbSDimitry Andric static char ID; // Pass identification, replacement for typeid 485ffd83dbSDimitry Andric SVEIntrinsicOpts() : ModulePass(ID) { 495ffd83dbSDimitry Andric initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry()); 505ffd83dbSDimitry Andric } 515ffd83dbSDimitry Andric 525ffd83dbSDimitry Andric bool runOnModule(Module &M) override; 535ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 545ffd83dbSDimitry Andric 555ffd83dbSDimitry Andric private: 56fe6060f1SDimitry Andric bool coalescePTrueIntrinsicCalls(BasicBlock &BB, 57fe6060f1SDimitry Andric SmallSetVector<IntrinsicInst *, 4> &PTrues); 58fe6060f1SDimitry Andric bool optimizePTrueIntrinsicCalls(SmallSetVector<Function *, 4> &Functions); 59349cc55cSDimitry Andric bool optimizePredicateStore(Instruction *I); 60349cc55cSDimitry Andric bool optimizePredicateLoad(Instruction *I); 61349cc55cSDimitry Andric 62349cc55cSDimitry Andric bool optimizeInstructions(SmallSetVector<Function *, 4> &Functions); 635ffd83dbSDimitry Andric 64fe6060f1SDimitry Andric /// Operates at the function-scope. I.e., optimizations are applied local to 65fe6060f1SDimitry Andric /// the functions themselves. 665ffd83dbSDimitry Andric bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions); 675ffd83dbSDimitry Andric }; 685ffd83dbSDimitry Andric } // end anonymous namespace 695ffd83dbSDimitry Andric 705ffd83dbSDimitry Andric void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const { 715ffd83dbSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 725ffd83dbSDimitry Andric AU.setPreservesCFG(); 735ffd83dbSDimitry Andric } 745ffd83dbSDimitry Andric 755ffd83dbSDimitry Andric char SVEIntrinsicOpts::ID = 0; 765ffd83dbSDimitry Andric static const char *name = "SVE intrinsics optimizations"; 775ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 785ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 795ffd83dbSDimitry Andric INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 805ffd83dbSDimitry Andric 81fe6060f1SDimitry Andric ModulePass *llvm::createSVEIntrinsicOptsPass() { 82fe6060f1SDimitry Andric return new SVEIntrinsicOpts(); 835ffd83dbSDimitry Andric } 845ffd83dbSDimitry Andric 85fe6060f1SDimitry Andric /// Checks if a ptrue intrinsic call is promoted. The act of promoting a 86fe6060f1SDimitry Andric /// ptrue will introduce zeroing. For example: 87fe6060f1SDimitry Andric /// 88fe6060f1SDimitry Andric /// %1 = <vscale x 4 x i1> call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 89fe6060f1SDimitry Andric /// %2 = <vscale x 16 x i1> call @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1) 90fe6060f1SDimitry Andric /// %3 = <vscale x 8 x i1> call @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2) 91fe6060f1SDimitry Andric /// 92fe6060f1SDimitry Andric /// %1 is promoted, because it is converted: 93fe6060f1SDimitry Andric /// 94fe6060f1SDimitry Andric /// <vscale x 4 x i1> => <vscale x 16 x i1> => <vscale x 8 x i1> 95fe6060f1SDimitry Andric /// 96fe6060f1SDimitry Andric /// via a sequence of the SVE reinterpret intrinsics convert.{to,from}.svbool. 97fe6060f1SDimitry Andric static bool isPTruePromoted(IntrinsicInst *PTrue) { 98fe6060f1SDimitry Andric // Find all users of this intrinsic that are calls to convert-to-svbool 99fe6060f1SDimitry Andric // reinterpret intrinsics. 100fe6060f1SDimitry Andric SmallVector<IntrinsicInst *, 4> ConvertToUses; 101fe6060f1SDimitry Andric for (User *User : PTrue->users()) { 102fe6060f1SDimitry Andric if (match(User, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>())) { 103fe6060f1SDimitry Andric ConvertToUses.push_back(cast<IntrinsicInst>(User)); 104fe6060f1SDimitry Andric } 105fe6060f1SDimitry Andric } 1065ffd83dbSDimitry Andric 107fe6060f1SDimitry Andric // If no such calls were found, this is ptrue is not promoted. 108fe6060f1SDimitry Andric if (ConvertToUses.empty()) 1095ffd83dbSDimitry Andric return false; 1105ffd83dbSDimitry Andric 111fe6060f1SDimitry Andric // Otherwise, try to find users of the convert-to-svbool intrinsics that are 112fe6060f1SDimitry Andric // calls to the convert-from-svbool intrinsic, and would result in some lanes 113fe6060f1SDimitry Andric // being zeroed. 114fe6060f1SDimitry Andric const auto *PTrueVTy = cast<ScalableVectorType>(PTrue->getType()); 115fe6060f1SDimitry Andric for (IntrinsicInst *ConvertToUse : ConvertToUses) { 116fe6060f1SDimitry Andric for (User *User : ConvertToUse->users()) { 117fe6060f1SDimitry Andric auto *IntrUser = dyn_cast<IntrinsicInst>(User); 118fe6060f1SDimitry Andric if (IntrUser && IntrUser->getIntrinsicID() == 119fe6060f1SDimitry Andric Intrinsic::aarch64_sve_convert_from_svbool) { 120fe6060f1SDimitry Andric const auto *IntrUserVTy = cast<ScalableVectorType>(IntrUser->getType()); 121fe6060f1SDimitry Andric 122fe6060f1SDimitry Andric // Would some lanes become zeroed by the conversion? 123fe6060f1SDimitry Andric if (IntrUserVTy->getElementCount().getKnownMinValue() > 124fe6060f1SDimitry Andric PTrueVTy->getElementCount().getKnownMinValue()) 125fe6060f1SDimitry Andric // This is a promoted ptrue. 126fe6060f1SDimitry Andric return true; 127fe6060f1SDimitry Andric } 128fe6060f1SDimitry Andric } 129fe6060f1SDimitry Andric } 130fe6060f1SDimitry Andric 131fe6060f1SDimitry Andric // If no matching calls were found, this is not a promoted ptrue. 1325ffd83dbSDimitry Andric return false; 1335ffd83dbSDimitry Andric } 1345ffd83dbSDimitry Andric 135fe6060f1SDimitry Andric /// Attempts to coalesce ptrues in a basic block. 136fe6060f1SDimitry Andric bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls( 137fe6060f1SDimitry Andric BasicBlock &BB, SmallSetVector<IntrinsicInst *, 4> &PTrues) { 138fe6060f1SDimitry Andric if (PTrues.size() <= 1) 139fe6060f1SDimitry Andric return false; 140fe6060f1SDimitry Andric 141fe6060f1SDimitry Andric // Find the ptrue with the most lanes. 142*0fca6ea1SDimitry Andric auto *MostEncompassingPTrue = 143*0fca6ea1SDimitry Andric *llvm::max_element(PTrues, [](auto *PTrue1, auto *PTrue2) { 144fe6060f1SDimitry Andric auto *PTrue1VTy = cast<ScalableVectorType>(PTrue1->getType()); 145fe6060f1SDimitry Andric auto *PTrue2VTy = cast<ScalableVectorType>(PTrue2->getType()); 146fe6060f1SDimitry Andric return PTrue1VTy->getElementCount().getKnownMinValue() < 147fe6060f1SDimitry Andric PTrue2VTy->getElementCount().getKnownMinValue(); 148fe6060f1SDimitry Andric }); 149fe6060f1SDimitry Andric 150fe6060f1SDimitry Andric // Remove the most encompassing ptrue, as well as any promoted ptrues, leaving 151fe6060f1SDimitry Andric // behind only the ptrues to be coalesced. 152fe6060f1SDimitry Andric PTrues.remove(MostEncompassingPTrue); 1530eae32dcSDimitry Andric PTrues.remove_if(isPTruePromoted); 154fe6060f1SDimitry Andric 155fe6060f1SDimitry Andric // Hoist MostEncompassingPTrue to the start of the basic block. It is always 156fe6060f1SDimitry Andric // safe to do this, since ptrue intrinsic calls are guaranteed to have no 157fe6060f1SDimitry Andric // predecessors. 158fe6060f1SDimitry Andric MostEncompassingPTrue->moveBefore(BB, BB.getFirstInsertionPt()); 159fe6060f1SDimitry Andric 160fe6060f1SDimitry Andric LLVMContext &Ctx = BB.getContext(); 1615ffd83dbSDimitry Andric IRBuilder<> Builder(Ctx); 162fe6060f1SDimitry Andric Builder.SetInsertPoint(&BB, ++MostEncompassingPTrue->getIterator()); 1635ffd83dbSDimitry Andric 164fe6060f1SDimitry Andric auto *MostEncompassingPTrueVTy = 165fe6060f1SDimitry Andric cast<VectorType>(MostEncompassingPTrue->getType()); 166fe6060f1SDimitry Andric auto *ConvertToSVBool = Builder.CreateIntrinsic( 167fe6060f1SDimitry Andric Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy}, 168fe6060f1SDimitry Andric {MostEncompassingPTrue}); 169fe6060f1SDimitry Andric 170fe6060f1SDimitry Andric bool ConvertFromCreated = false; 171fe6060f1SDimitry Andric for (auto *PTrue : PTrues) { 172fe6060f1SDimitry Andric auto *PTrueVTy = cast<VectorType>(PTrue->getType()); 173fe6060f1SDimitry Andric 174fe6060f1SDimitry Andric // Only create the converts if the types are not already the same, otherwise 175fe6060f1SDimitry Andric // just use the most encompassing ptrue. 176fe6060f1SDimitry Andric if (MostEncompassingPTrueVTy != PTrueVTy) { 177fe6060f1SDimitry Andric ConvertFromCreated = true; 178fe6060f1SDimitry Andric 179fe6060f1SDimitry Andric Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator()); 180fe6060f1SDimitry Andric auto *ConvertFromSVBool = 181fe6060f1SDimitry Andric Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, 182fe6060f1SDimitry Andric {PTrueVTy}, {ConvertToSVBool}); 183fe6060f1SDimitry Andric PTrue->replaceAllUsesWith(ConvertFromSVBool); 184fe6060f1SDimitry Andric } else 185fe6060f1SDimitry Andric PTrue->replaceAllUsesWith(MostEncompassingPTrue); 186fe6060f1SDimitry Andric 187fe6060f1SDimitry Andric PTrue->eraseFromParent(); 1885ffd83dbSDimitry Andric } 1895ffd83dbSDimitry Andric 190fe6060f1SDimitry Andric // We never used the ConvertTo so remove it 191fe6060f1SDimitry Andric if (!ConvertFromCreated) 192fe6060f1SDimitry Andric ConvertToSVBool->eraseFromParent(); 1935ffd83dbSDimitry Andric 1945ffd83dbSDimitry Andric return true; 1955ffd83dbSDimitry Andric } 1965ffd83dbSDimitry Andric 197fe6060f1SDimitry Andric /// The goal of this function is to remove redundant calls to the SVE ptrue 198fe6060f1SDimitry Andric /// intrinsic in each basic block within the given functions. 199fe6060f1SDimitry Andric /// 200fe6060f1SDimitry Andric /// SVE ptrues have two representations in LLVM IR: 201fe6060f1SDimitry Andric /// - a logical representation -- an arbitrary-width scalable vector of i1s, 202fe6060f1SDimitry Andric /// i.e. <vscale x N x i1>. 203fe6060f1SDimitry Andric /// - a physical representation (svbool, <vscale x 16 x i1>) -- a 16-element 204fe6060f1SDimitry Andric /// scalable vector of i1s, i.e. <vscale x 16 x i1>. 205fe6060f1SDimitry Andric /// 206fe6060f1SDimitry Andric /// The SVE ptrue intrinsic is used to create a logical representation of an SVE 207fe6060f1SDimitry Andric /// predicate. Suppose that we have two SVE ptrue intrinsic calls: P1 and P2. If 208fe6060f1SDimitry Andric /// P1 creates a logical SVE predicate that is at least as wide as the logical 209fe6060f1SDimitry Andric /// SVE predicate created by P2, then all of the bits that are true in the 210fe6060f1SDimitry Andric /// physical representation of P2 are necessarily also true in the physical 211fe6060f1SDimitry Andric /// representation of P1. P1 'encompasses' P2, therefore, the intrinsic call to 212fe6060f1SDimitry Andric /// P2 is redundant and can be replaced by an SVE reinterpret of P1 via 213fe6060f1SDimitry Andric /// convert.{to,from}.svbool. 214fe6060f1SDimitry Andric /// 215fe6060f1SDimitry Andric /// Currently, this pass only coalesces calls to SVE ptrue intrinsics 216fe6060f1SDimitry Andric /// if they match the following conditions: 217fe6060f1SDimitry Andric /// 218fe6060f1SDimitry Andric /// - the call to the intrinsic uses either the SV_ALL or SV_POW2 patterns. 219fe6060f1SDimitry Andric /// SV_ALL indicates that all bits of the predicate vector are to be set to 220fe6060f1SDimitry Andric /// true. SV_POW2 indicates that all bits of the predicate vector up to the 221fe6060f1SDimitry Andric /// largest power-of-two are to be set to true. 222fe6060f1SDimitry Andric /// - the result of the call to the intrinsic is not promoted to a wider 223fe6060f1SDimitry Andric /// predicate. In this case, keeping the extra ptrue leads to better codegen 224fe6060f1SDimitry Andric /// -- coalescing here would create an irreducible chain of SVE reinterprets 225fe6060f1SDimitry Andric /// via convert.{to,from}.svbool. 226fe6060f1SDimitry Andric /// 227fe6060f1SDimitry Andric /// EXAMPLE: 228fe6060f1SDimitry Andric /// 229fe6060f1SDimitry Andric /// %1 = <vscale x 8 x i1> ptrue(i32 SV_ALL) 230fe6060f1SDimitry Andric /// ; Logical: <1, 1, 1, 1, 1, 1, 1, 1> 231fe6060f1SDimitry Andric /// ; Physical: <1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0> 232fe6060f1SDimitry Andric /// ... 233fe6060f1SDimitry Andric /// 234fe6060f1SDimitry Andric /// %2 = <vscale x 4 x i1> ptrue(i32 SV_ALL) 235fe6060f1SDimitry Andric /// ; Logical: <1, 1, 1, 1> 236fe6060f1SDimitry Andric /// ; Physical: <1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0> 237fe6060f1SDimitry Andric /// ... 238fe6060f1SDimitry Andric /// 239fe6060f1SDimitry Andric /// Here, %2 can be replaced by an SVE reinterpret of %1, giving, for instance: 240fe6060f1SDimitry Andric /// 241fe6060f1SDimitry Andric /// %1 = <vscale x 8 x i1> ptrue(i32 i31) 242fe6060f1SDimitry Andric /// %2 = <vscale x 16 x i1> convert.to.svbool(<vscale x 8 x i1> %1) 243fe6060f1SDimitry Andric /// %3 = <vscale x 4 x i1> convert.from.svbool(<vscale x 16 x i1> %2) 244fe6060f1SDimitry Andric /// 245fe6060f1SDimitry Andric bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls( 246fe6060f1SDimitry Andric SmallSetVector<Function *, 4> &Functions) { 247fe6060f1SDimitry Andric bool Changed = false; 2485ffd83dbSDimitry Andric 249fe6060f1SDimitry Andric for (auto *F : Functions) { 250fe6060f1SDimitry Andric for (auto &BB : *F) { 251fe6060f1SDimitry Andric SmallSetVector<IntrinsicInst *, 4> SVAllPTrues; 252fe6060f1SDimitry Andric SmallSetVector<IntrinsicInst *, 4> SVPow2PTrues; 2535ffd83dbSDimitry Andric 254fe6060f1SDimitry Andric // For each basic block, collect the used ptrues and try to coalesce them. 255fe6060f1SDimitry Andric for (Instruction &I : BB) { 256fe6060f1SDimitry Andric if (I.use_empty()) 257fe6060f1SDimitry Andric continue; 2585ffd83dbSDimitry Andric 259fe6060f1SDimitry Andric auto *IntrI = dyn_cast<IntrinsicInst>(&I); 260fe6060f1SDimitry Andric if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue) 261fe6060f1SDimitry Andric continue; 2625ffd83dbSDimitry Andric 263fe6060f1SDimitry Andric const auto PTruePattern = 264fe6060f1SDimitry Andric cast<ConstantInt>(IntrI->getOperand(0))->getZExtValue(); 2655ffd83dbSDimitry Andric 266fe6060f1SDimitry Andric if (PTruePattern == AArch64SVEPredPattern::all) 267fe6060f1SDimitry Andric SVAllPTrues.insert(IntrI); 268fe6060f1SDimitry Andric if (PTruePattern == AArch64SVEPredPattern::pow2) 269fe6060f1SDimitry Andric SVPow2PTrues.insert(IntrI); 2705ffd83dbSDimitry Andric } 2715ffd83dbSDimitry Andric 272fe6060f1SDimitry Andric Changed |= coalescePTrueIntrinsicCalls(BB, SVAllPTrues); 273fe6060f1SDimitry Andric Changed |= coalescePTrueIntrinsicCalls(BB, SVPow2PTrues); 274fe6060f1SDimitry Andric } 2755ffd83dbSDimitry Andric } 2765ffd83dbSDimitry Andric 277fe6060f1SDimitry Andric return Changed; 2785ffd83dbSDimitry Andric } 2795ffd83dbSDimitry Andric 280349cc55cSDimitry Andric // This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce 281349cc55cSDimitry Andric // scalable stores as late as possible 282349cc55cSDimitry Andric bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) { 283349cc55cSDimitry Andric auto *F = I->getFunction(); 284349cc55cSDimitry Andric auto Attr = F->getFnAttribute(Attribute::VScaleRange); 285349cc55cSDimitry Andric if (!Attr.isValid()) 286349cc55cSDimitry Andric return false; 287349cc55cSDimitry Andric 2880eae32dcSDimitry Andric unsigned MinVScale = Attr.getVScaleRangeMin(); 289bdd1243dSDimitry Andric std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax(); 290349cc55cSDimitry Andric // The transform needs to know the exact runtime length of scalable vectors 2910eae32dcSDimitry Andric if (!MaxVScale || MinVScale != MaxVScale) 292349cc55cSDimitry Andric return false; 293349cc55cSDimitry Andric 294349cc55cSDimitry Andric auto *PredType = 295349cc55cSDimitry Andric ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16); 296349cc55cSDimitry Andric auto *FixedPredType = 297349cc55cSDimitry Andric FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2); 298349cc55cSDimitry Andric 299349cc55cSDimitry Andric // If we have a store.. 300349cc55cSDimitry Andric auto *Store = dyn_cast<StoreInst>(I); 301349cc55cSDimitry Andric if (!Store || !Store->isSimple()) 302349cc55cSDimitry Andric return false; 303349cc55cSDimitry Andric 304349cc55cSDimitry Andric // ..that is storing a predicate vector sized worth of bits.. 305349cc55cSDimitry Andric if (Store->getOperand(0)->getType() != FixedPredType) 306349cc55cSDimitry Andric return false; 307349cc55cSDimitry Andric 308349cc55cSDimitry Andric // ..where the value stored comes from a vector extract.. 309349cc55cSDimitry Andric auto *IntrI = dyn_cast<IntrinsicInst>(Store->getOperand(0)); 31081ad6265SDimitry Andric if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_extract) 311349cc55cSDimitry Andric return false; 312349cc55cSDimitry Andric 313349cc55cSDimitry Andric // ..that is extracting from index 0.. 314349cc55cSDimitry Andric if (!cast<ConstantInt>(IntrI->getOperand(1))->isZero()) 315349cc55cSDimitry Andric return false; 316349cc55cSDimitry Andric 317349cc55cSDimitry Andric // ..where the value being extract from comes from a bitcast 318349cc55cSDimitry Andric auto *BitCast = dyn_cast<BitCastInst>(IntrI->getOperand(0)); 319349cc55cSDimitry Andric if (!BitCast) 320349cc55cSDimitry Andric return false; 321349cc55cSDimitry Andric 322349cc55cSDimitry Andric // ..and the bitcast is casting from predicate type 323349cc55cSDimitry Andric if (BitCast->getOperand(0)->getType() != PredType) 324349cc55cSDimitry Andric return false; 325349cc55cSDimitry Andric 326349cc55cSDimitry Andric IRBuilder<> Builder(I->getContext()); 327349cc55cSDimitry Andric Builder.SetInsertPoint(I); 328349cc55cSDimitry Andric 3295f757f3fSDimitry Andric Builder.CreateStore(BitCast->getOperand(0), Store->getPointerOperand()); 330349cc55cSDimitry Andric 331349cc55cSDimitry Andric Store->eraseFromParent(); 332349cc55cSDimitry Andric if (IntrI->getNumUses() == 0) 333349cc55cSDimitry Andric IntrI->eraseFromParent(); 334349cc55cSDimitry Andric if (BitCast->getNumUses() == 0) 335349cc55cSDimitry Andric BitCast->eraseFromParent(); 336349cc55cSDimitry Andric 337349cc55cSDimitry Andric return true; 338349cc55cSDimitry Andric } 339349cc55cSDimitry Andric 340349cc55cSDimitry Andric // This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce 341349cc55cSDimitry Andric // scalable loads as late as possible 342349cc55cSDimitry Andric bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) { 343349cc55cSDimitry Andric auto *F = I->getFunction(); 344349cc55cSDimitry Andric auto Attr = F->getFnAttribute(Attribute::VScaleRange); 345349cc55cSDimitry Andric if (!Attr.isValid()) 346349cc55cSDimitry Andric return false; 347349cc55cSDimitry Andric 3480eae32dcSDimitry Andric unsigned MinVScale = Attr.getVScaleRangeMin(); 349bdd1243dSDimitry Andric std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax(); 350349cc55cSDimitry Andric // The transform needs to know the exact runtime length of scalable vectors 3510eae32dcSDimitry Andric if (!MaxVScale || MinVScale != MaxVScale) 352349cc55cSDimitry Andric return false; 353349cc55cSDimitry Andric 354349cc55cSDimitry Andric auto *PredType = 355349cc55cSDimitry Andric ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16); 356349cc55cSDimitry Andric auto *FixedPredType = 357349cc55cSDimitry Andric FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2); 358349cc55cSDimitry Andric 359349cc55cSDimitry Andric // If we have a bitcast.. 360349cc55cSDimitry Andric auto *BitCast = dyn_cast<BitCastInst>(I); 361349cc55cSDimitry Andric if (!BitCast || BitCast->getType() != PredType) 362349cc55cSDimitry Andric return false; 363349cc55cSDimitry Andric 364349cc55cSDimitry Andric // ..whose operand is a vector_insert.. 365349cc55cSDimitry Andric auto *IntrI = dyn_cast<IntrinsicInst>(BitCast->getOperand(0)); 36681ad6265SDimitry Andric if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_insert) 367349cc55cSDimitry Andric return false; 368349cc55cSDimitry Andric 369349cc55cSDimitry Andric // ..that is inserting into index zero of an undef vector.. 370349cc55cSDimitry Andric if (!isa<UndefValue>(IntrI->getOperand(0)) || 371349cc55cSDimitry Andric !cast<ConstantInt>(IntrI->getOperand(2))->isZero()) 372349cc55cSDimitry Andric return false; 373349cc55cSDimitry Andric 374349cc55cSDimitry Andric // ..where the value inserted comes from a load.. 375349cc55cSDimitry Andric auto *Load = dyn_cast<LoadInst>(IntrI->getOperand(1)); 376349cc55cSDimitry Andric if (!Load || !Load->isSimple()) 377349cc55cSDimitry Andric return false; 378349cc55cSDimitry Andric 379349cc55cSDimitry Andric // ..that is loading a predicate vector sized worth of bits.. 380349cc55cSDimitry Andric if (Load->getType() != FixedPredType) 381349cc55cSDimitry Andric return false; 382349cc55cSDimitry Andric 383349cc55cSDimitry Andric IRBuilder<> Builder(I->getContext()); 384349cc55cSDimitry Andric Builder.SetInsertPoint(Load); 385349cc55cSDimitry Andric 3865f757f3fSDimitry Andric auto *LoadPred = Builder.CreateLoad(PredType, Load->getPointerOperand()); 387349cc55cSDimitry Andric 388349cc55cSDimitry Andric BitCast->replaceAllUsesWith(LoadPred); 389349cc55cSDimitry Andric BitCast->eraseFromParent(); 390349cc55cSDimitry Andric if (IntrI->getNumUses() == 0) 391349cc55cSDimitry Andric IntrI->eraseFromParent(); 392349cc55cSDimitry Andric if (Load->getNumUses() == 0) 393349cc55cSDimitry Andric Load->eraseFromParent(); 394349cc55cSDimitry Andric 395349cc55cSDimitry Andric return true; 396349cc55cSDimitry Andric } 397349cc55cSDimitry Andric 398349cc55cSDimitry Andric bool SVEIntrinsicOpts::optimizeInstructions( 399349cc55cSDimitry Andric SmallSetVector<Function *, 4> &Functions) { 400349cc55cSDimitry Andric bool Changed = false; 401349cc55cSDimitry Andric 402349cc55cSDimitry Andric for (auto *F : Functions) { 403349cc55cSDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree(); 404349cc55cSDimitry Andric 405349cc55cSDimitry Andric // Traverse the DT with an rpo walk so we see defs before uses, allowing 406349cc55cSDimitry Andric // simplification to be done incrementally. 407349cc55cSDimitry Andric BasicBlock *Root = DT->getRoot(); 408349cc55cSDimitry Andric ReversePostOrderTraversal<BasicBlock *> RPOT(Root); 409349cc55cSDimitry Andric for (auto *BB : RPOT) { 410349cc55cSDimitry Andric for (Instruction &I : make_early_inc_range(*BB)) { 411349cc55cSDimitry Andric switch (I.getOpcode()) { 412349cc55cSDimitry Andric case Instruction::Store: 413349cc55cSDimitry Andric Changed |= optimizePredicateStore(&I); 414349cc55cSDimitry Andric break; 415349cc55cSDimitry Andric case Instruction::BitCast: 416349cc55cSDimitry Andric Changed |= optimizePredicateLoad(&I); 417349cc55cSDimitry Andric break; 418349cc55cSDimitry Andric } 419349cc55cSDimitry Andric } 420349cc55cSDimitry Andric } 421349cc55cSDimitry Andric } 422349cc55cSDimitry Andric 423349cc55cSDimitry Andric return Changed; 424349cc55cSDimitry Andric } 425349cc55cSDimitry Andric 4265ffd83dbSDimitry Andric bool SVEIntrinsicOpts::optimizeFunctions( 4275ffd83dbSDimitry Andric SmallSetVector<Function *, 4> &Functions) { 4285ffd83dbSDimitry Andric bool Changed = false; 4295ffd83dbSDimitry Andric 430fe6060f1SDimitry Andric Changed |= optimizePTrueIntrinsicCalls(Functions); 431349cc55cSDimitry Andric Changed |= optimizeInstructions(Functions); 432fe6060f1SDimitry Andric 4335ffd83dbSDimitry Andric return Changed; 4345ffd83dbSDimitry Andric } 4355ffd83dbSDimitry Andric 4365ffd83dbSDimitry Andric bool SVEIntrinsicOpts::runOnModule(Module &M) { 4375ffd83dbSDimitry Andric bool Changed = false; 4385ffd83dbSDimitry Andric SmallSetVector<Function *, 4> Functions; 4395ffd83dbSDimitry Andric 4405ffd83dbSDimitry Andric // Check for SVE intrinsic declarations first so that we only iterate over 4415ffd83dbSDimitry Andric // relevant functions. Where an appropriate declaration is found, store the 4425ffd83dbSDimitry Andric // function(s) where it is used so we can target these only. 4435ffd83dbSDimitry Andric for (auto &F : M.getFunctionList()) { 4445ffd83dbSDimitry Andric if (!F.isDeclaration()) 4455ffd83dbSDimitry Andric continue; 4465ffd83dbSDimitry Andric 4475ffd83dbSDimitry Andric switch (F.getIntrinsicID()) { 44881ad6265SDimitry Andric case Intrinsic::vector_extract: 44981ad6265SDimitry Andric case Intrinsic::vector_insert: 450fe6060f1SDimitry Andric case Intrinsic::aarch64_sve_ptrue: 451e8d8bef9SDimitry Andric for (User *U : F.users()) 452e8d8bef9SDimitry Andric Functions.insert(cast<Instruction>(U)->getFunction()); 4535ffd83dbSDimitry Andric break; 4545ffd83dbSDimitry Andric default: 4555ffd83dbSDimitry Andric break; 4565ffd83dbSDimitry Andric } 4575ffd83dbSDimitry Andric } 4585ffd83dbSDimitry Andric 4595ffd83dbSDimitry Andric if (!Functions.empty()) 4605ffd83dbSDimitry Andric Changed |= optimizeFunctions(Functions); 4615ffd83dbSDimitry Andric 4625ffd83dbSDimitry Andric return Changed; 4635ffd83dbSDimitry Andric } 464