15ffd83dbSDimitry Andric //===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // The LLVM Compiler Infrastructure 45ffd83dbSDimitry Andric // 55ffd83dbSDimitry Andric // This file is distributed under the University of Illinois Open Source 65ffd83dbSDimitry Andric // License. See LICENSE.TXT for details. 75ffd83dbSDimitry Andric // 85ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 95ffd83dbSDimitry Andric // 105ffd83dbSDimitry Andric // Performs general IR level optimizations on SVE intrinsics. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric // The main goal of this pass is to remove unnecessary reinterpret 135ffd83dbSDimitry Andric // intrinsics (llvm.aarch64.sve.convert.[to|from].svbool), e.g: 145ffd83dbSDimitry Andric // 155ffd83dbSDimitry Andric // %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a) 165ffd83dbSDimitry Andric // %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1) 175ffd83dbSDimitry Andric // 185ffd83dbSDimitry Andric // This pass also looks for ptest intrinsics & phi instructions where the 195ffd83dbSDimitry Andric // operands are being needlessly converted to and from svbool_t. 205ffd83dbSDimitry Andric // 215ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 225ffd83dbSDimitry Andric 235ffd83dbSDimitry Andric #include "Utils/AArch64BaseInfo.h" 245ffd83dbSDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 255ffd83dbSDimitry Andric #include "llvm/ADT/SetVector.h" 265ffd83dbSDimitry Andric #include "llvm/IR/Constants.h" 275ffd83dbSDimitry Andric #include "llvm/IR/Dominators.h" 285ffd83dbSDimitry Andric #include "llvm/IR/IRBuilder.h" 295ffd83dbSDimitry Andric #include "llvm/IR/Instructions.h" 305ffd83dbSDimitry Andric #include "llvm/IR/IntrinsicInst.h" 315ffd83dbSDimitry Andric #include "llvm/IR/IntrinsicsAArch64.h" 325ffd83dbSDimitry Andric #include "llvm/IR/LLVMContext.h" 335ffd83dbSDimitry Andric #include "llvm/IR/PatternMatch.h" 345ffd83dbSDimitry Andric #include "llvm/InitializePasses.h" 355ffd83dbSDimitry Andric #include "llvm/Support/Debug.h" 365ffd83dbSDimitry Andric 375ffd83dbSDimitry Andric using namespace llvm; 385ffd83dbSDimitry Andric using namespace llvm::PatternMatch; 395ffd83dbSDimitry Andric 40*e8d8bef9SDimitry Andric #define DEBUG_TYPE "aarch64-sve-intrinsic-opts" 415ffd83dbSDimitry Andric 425ffd83dbSDimitry Andric namespace llvm { 435ffd83dbSDimitry Andric void initializeSVEIntrinsicOptsPass(PassRegistry &); 445ffd83dbSDimitry Andric } 455ffd83dbSDimitry Andric 465ffd83dbSDimitry Andric namespace { 475ffd83dbSDimitry Andric struct SVEIntrinsicOpts : public ModulePass { 485ffd83dbSDimitry Andric static char ID; // Pass identification, replacement for typeid 495ffd83dbSDimitry Andric SVEIntrinsicOpts() : ModulePass(ID) { 505ffd83dbSDimitry Andric initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry()); 515ffd83dbSDimitry Andric } 525ffd83dbSDimitry Andric 535ffd83dbSDimitry Andric bool runOnModule(Module &M) override; 545ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 555ffd83dbSDimitry Andric 565ffd83dbSDimitry Andric private: 575ffd83dbSDimitry Andric static IntrinsicInst *isReinterpretToSVBool(Value *V); 585ffd83dbSDimitry Andric 595ffd83dbSDimitry Andric static bool optimizeIntrinsic(Instruction *I); 605ffd83dbSDimitry Andric 615ffd83dbSDimitry Andric bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions); 625ffd83dbSDimitry Andric 635ffd83dbSDimitry Andric static bool optimizeConvertFromSVBool(IntrinsicInst *I); 645ffd83dbSDimitry Andric static bool optimizePTest(IntrinsicInst *I); 655ffd83dbSDimitry Andric 665ffd83dbSDimitry Andric static bool processPhiNode(IntrinsicInst *I); 675ffd83dbSDimitry Andric }; 685ffd83dbSDimitry Andric } // end anonymous namespace 695ffd83dbSDimitry Andric 705ffd83dbSDimitry Andric void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const { 715ffd83dbSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 725ffd83dbSDimitry Andric AU.setPreservesCFG(); 735ffd83dbSDimitry Andric } 745ffd83dbSDimitry Andric 755ffd83dbSDimitry Andric char SVEIntrinsicOpts::ID = 0; 765ffd83dbSDimitry Andric static const char *name = "SVE intrinsics optimizations"; 775ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 785ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 795ffd83dbSDimitry Andric INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 805ffd83dbSDimitry Andric 815ffd83dbSDimitry Andric namespace llvm { 825ffd83dbSDimitry Andric ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); } 835ffd83dbSDimitry Andric } // namespace llvm 845ffd83dbSDimitry Andric 855ffd83dbSDimitry Andric /// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr 865ffd83dbSDimitry Andric /// otherwise. 875ffd83dbSDimitry Andric IntrinsicInst *SVEIntrinsicOpts::isReinterpretToSVBool(Value *V) { 885ffd83dbSDimitry Andric IntrinsicInst *I = dyn_cast<IntrinsicInst>(V); 895ffd83dbSDimitry Andric if (!I) 905ffd83dbSDimitry Andric return nullptr; 915ffd83dbSDimitry Andric 925ffd83dbSDimitry Andric if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool) 935ffd83dbSDimitry Andric return nullptr; 945ffd83dbSDimitry Andric 955ffd83dbSDimitry Andric return I; 965ffd83dbSDimitry Andric } 975ffd83dbSDimitry Andric 985ffd83dbSDimitry Andric /// The function will remove redundant reinterprets casting in the presence 995ffd83dbSDimitry Andric /// of the control flow 1005ffd83dbSDimitry Andric bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) { 1015ffd83dbSDimitry Andric 1025ffd83dbSDimitry Andric SmallVector<Instruction *, 32> Worklist; 1035ffd83dbSDimitry Andric auto RequiredType = X->getType(); 1045ffd83dbSDimitry Andric 1055ffd83dbSDimitry Andric auto *PN = dyn_cast<PHINode>(X->getArgOperand(0)); 1065ffd83dbSDimitry Andric assert(PN && "Expected Phi Node!"); 1075ffd83dbSDimitry Andric 1085ffd83dbSDimitry Andric // Don't create a new Phi unless we can remove the old one. 1095ffd83dbSDimitry Andric if (!PN->hasOneUse()) 1105ffd83dbSDimitry Andric return false; 1115ffd83dbSDimitry Andric 1125ffd83dbSDimitry Andric for (Value *IncValPhi : PN->incoming_values()) { 1135ffd83dbSDimitry Andric auto *Reinterpret = isReinterpretToSVBool(IncValPhi); 1145ffd83dbSDimitry Andric if (!Reinterpret || 1155ffd83dbSDimitry Andric RequiredType != Reinterpret->getArgOperand(0)->getType()) 1165ffd83dbSDimitry Andric return false; 1175ffd83dbSDimitry Andric } 1185ffd83dbSDimitry Andric 1195ffd83dbSDimitry Andric // Create the new Phi 1205ffd83dbSDimitry Andric LLVMContext &Ctx = PN->getContext(); 1215ffd83dbSDimitry Andric IRBuilder<> Builder(Ctx); 1225ffd83dbSDimitry Andric Builder.SetInsertPoint(PN); 1235ffd83dbSDimitry Andric PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues()); 1245ffd83dbSDimitry Andric Worklist.push_back(PN); 1255ffd83dbSDimitry Andric 1265ffd83dbSDimitry Andric for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) { 1275ffd83dbSDimitry Andric auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I)); 1285ffd83dbSDimitry Andric NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I)); 1295ffd83dbSDimitry Andric Worklist.push_back(Reinterpret); 1305ffd83dbSDimitry Andric } 1315ffd83dbSDimitry Andric 1325ffd83dbSDimitry Andric // Cleanup Phi Node and reinterprets 1335ffd83dbSDimitry Andric X->replaceAllUsesWith(NPN); 1345ffd83dbSDimitry Andric X->eraseFromParent(); 1355ffd83dbSDimitry Andric 1365ffd83dbSDimitry Andric for (auto &I : Worklist) 1375ffd83dbSDimitry Andric if (I->use_empty()) 1385ffd83dbSDimitry Andric I->eraseFromParent(); 1395ffd83dbSDimitry Andric 1405ffd83dbSDimitry Andric return true; 1415ffd83dbSDimitry Andric } 1425ffd83dbSDimitry Andric 1435ffd83dbSDimitry Andric bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) { 1445ffd83dbSDimitry Andric IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0)); 1455ffd83dbSDimitry Andric IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1)); 1465ffd83dbSDimitry Andric 1475ffd83dbSDimitry Andric if (Op1 && Op2 && 1485ffd83dbSDimitry Andric Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && 1495ffd83dbSDimitry Andric Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && 1505ffd83dbSDimitry Andric Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { 1515ffd83dbSDimitry Andric 1525ffd83dbSDimitry Andric Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; 1535ffd83dbSDimitry Andric Type *Tys[] = {Op1->getArgOperand(0)->getType()}; 1545ffd83dbSDimitry Andric Module *M = I->getParent()->getParent()->getParent(); 1555ffd83dbSDimitry Andric 1565ffd83dbSDimitry Andric auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys); 1575ffd83dbSDimitry Andric auto CI = CallInst::Create(Fn, Ops, I->getName(), I); 1585ffd83dbSDimitry Andric 1595ffd83dbSDimitry Andric I->replaceAllUsesWith(CI); 1605ffd83dbSDimitry Andric I->eraseFromParent(); 1615ffd83dbSDimitry Andric if (Op1->use_empty()) 1625ffd83dbSDimitry Andric Op1->eraseFromParent(); 16375b4d546SDimitry Andric if (Op1 != Op2 && Op2->use_empty()) 1645ffd83dbSDimitry Andric Op2->eraseFromParent(); 1655ffd83dbSDimitry Andric 1665ffd83dbSDimitry Andric return true; 1675ffd83dbSDimitry Andric } 1685ffd83dbSDimitry Andric 1695ffd83dbSDimitry Andric return false; 1705ffd83dbSDimitry Andric } 1715ffd83dbSDimitry Andric 1725ffd83dbSDimitry Andric bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) { 1735ffd83dbSDimitry Andric assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool && 1745ffd83dbSDimitry Andric "Unexpected opcode"); 1755ffd83dbSDimitry Andric 1765ffd83dbSDimitry Andric // If the reinterpret instruction operand is a PHI Node 1775ffd83dbSDimitry Andric if (isa<PHINode>(I->getArgOperand(0))) 1785ffd83dbSDimitry Andric return processPhiNode(I); 1795ffd83dbSDimitry Andric 180*e8d8bef9SDimitry Andric SmallVector<Instruction *, 32> CandidatesForRemoval; 181*e8d8bef9SDimitry Andric Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr; 182*e8d8bef9SDimitry Andric 183*e8d8bef9SDimitry Andric const auto *IVTy = cast<VectorType>(I->getType()); 184*e8d8bef9SDimitry Andric 185*e8d8bef9SDimitry Andric // Walk the chain of conversions. 186*e8d8bef9SDimitry Andric while (Cursor) { 187*e8d8bef9SDimitry Andric // If the type of the cursor has fewer lanes than the final result, zeroing 188*e8d8bef9SDimitry Andric // must take place, which breaks the equivalence chain. 189*e8d8bef9SDimitry Andric const auto *CursorVTy = cast<VectorType>(Cursor->getType()); 190*e8d8bef9SDimitry Andric if (CursorVTy->getElementCount().getKnownMinValue() < 191*e8d8bef9SDimitry Andric IVTy->getElementCount().getKnownMinValue()) 192*e8d8bef9SDimitry Andric break; 193*e8d8bef9SDimitry Andric 194*e8d8bef9SDimitry Andric // If the cursor has the same type as I, it is a viable replacement. 195*e8d8bef9SDimitry Andric if (Cursor->getType() == IVTy) 196*e8d8bef9SDimitry Andric EarliestReplacement = Cursor; 197*e8d8bef9SDimitry Andric 198*e8d8bef9SDimitry Andric auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor); 199*e8d8bef9SDimitry Andric 200*e8d8bef9SDimitry Andric // If this is not an SVE conversion intrinsic, this is the end of the chain. 201*e8d8bef9SDimitry Andric if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() == 202*e8d8bef9SDimitry Andric Intrinsic::aarch64_sve_convert_to_svbool || 203*e8d8bef9SDimitry Andric IntrinsicCursor->getIntrinsicID() == 204*e8d8bef9SDimitry Andric Intrinsic::aarch64_sve_convert_from_svbool)) 205*e8d8bef9SDimitry Andric break; 206*e8d8bef9SDimitry Andric 207*e8d8bef9SDimitry Andric CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor); 208*e8d8bef9SDimitry Andric Cursor = IntrinsicCursor->getOperand(0); 209*e8d8bef9SDimitry Andric } 210*e8d8bef9SDimitry Andric 211*e8d8bef9SDimitry Andric // If no viable replacement in the conversion chain was found, there is 212*e8d8bef9SDimitry Andric // nothing to do. 213*e8d8bef9SDimitry Andric if (!EarliestReplacement) 2145ffd83dbSDimitry Andric return false; 2155ffd83dbSDimitry Andric 216*e8d8bef9SDimitry Andric I->replaceAllUsesWith(EarliestReplacement); 2175ffd83dbSDimitry Andric I->eraseFromParent(); 2185ffd83dbSDimitry Andric 219*e8d8bef9SDimitry Andric while (!CandidatesForRemoval.empty()) { 220*e8d8bef9SDimitry Andric Instruction *Candidate = CandidatesForRemoval.pop_back_val(); 221*e8d8bef9SDimitry Andric if (Candidate->use_empty()) 222*e8d8bef9SDimitry Andric Candidate->eraseFromParent(); 223*e8d8bef9SDimitry Andric } 2245ffd83dbSDimitry Andric return true; 2255ffd83dbSDimitry Andric } 2265ffd83dbSDimitry Andric 2275ffd83dbSDimitry Andric bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) { 2285ffd83dbSDimitry Andric IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I); 2295ffd83dbSDimitry Andric if (!IntrI) 2305ffd83dbSDimitry Andric return false; 2315ffd83dbSDimitry Andric 2325ffd83dbSDimitry Andric switch (IntrI->getIntrinsicID()) { 2335ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_convert_from_svbool: 2345ffd83dbSDimitry Andric return optimizeConvertFromSVBool(IntrI); 2355ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_any: 2365ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_first: 2375ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_last: 2385ffd83dbSDimitry Andric return optimizePTest(IntrI); 2395ffd83dbSDimitry Andric default: 2405ffd83dbSDimitry Andric return false; 2415ffd83dbSDimitry Andric } 2425ffd83dbSDimitry Andric 2435ffd83dbSDimitry Andric return true; 2445ffd83dbSDimitry Andric } 2455ffd83dbSDimitry Andric 2465ffd83dbSDimitry Andric bool SVEIntrinsicOpts::optimizeFunctions( 2475ffd83dbSDimitry Andric SmallSetVector<Function *, 4> &Functions) { 2485ffd83dbSDimitry Andric bool Changed = false; 2495ffd83dbSDimitry Andric for (auto *F : Functions) { 2505ffd83dbSDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree(); 2515ffd83dbSDimitry Andric 2525ffd83dbSDimitry Andric // Traverse the DT with an rpo walk so we see defs before uses, allowing 2535ffd83dbSDimitry Andric // simplification to be done incrementally. 2545ffd83dbSDimitry Andric BasicBlock *Root = DT->getRoot(); 2555ffd83dbSDimitry Andric ReversePostOrderTraversal<BasicBlock *> RPOT(Root); 2565ffd83dbSDimitry Andric for (auto *BB : RPOT) 2575ffd83dbSDimitry Andric for (Instruction &I : make_early_inc_range(*BB)) 2585ffd83dbSDimitry Andric Changed |= optimizeIntrinsic(&I); 2595ffd83dbSDimitry Andric } 2605ffd83dbSDimitry Andric return Changed; 2615ffd83dbSDimitry Andric } 2625ffd83dbSDimitry Andric 2635ffd83dbSDimitry Andric bool SVEIntrinsicOpts::runOnModule(Module &M) { 2645ffd83dbSDimitry Andric bool Changed = false; 2655ffd83dbSDimitry Andric SmallSetVector<Function *, 4> Functions; 2665ffd83dbSDimitry Andric 2675ffd83dbSDimitry Andric // Check for SVE intrinsic declarations first so that we only iterate over 2685ffd83dbSDimitry Andric // relevant functions. Where an appropriate declaration is found, store the 2695ffd83dbSDimitry Andric // function(s) where it is used so we can target these only. 2705ffd83dbSDimitry Andric for (auto &F : M.getFunctionList()) { 2715ffd83dbSDimitry Andric if (!F.isDeclaration()) 2725ffd83dbSDimitry Andric continue; 2735ffd83dbSDimitry Andric 2745ffd83dbSDimitry Andric switch (F.getIntrinsicID()) { 2755ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_convert_from_svbool: 2765ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_any: 2775ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_first: 2785ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_last: 279*e8d8bef9SDimitry Andric for (User *U : F.users()) 280*e8d8bef9SDimitry Andric Functions.insert(cast<Instruction>(U)->getFunction()); 2815ffd83dbSDimitry Andric break; 2825ffd83dbSDimitry Andric default: 2835ffd83dbSDimitry Andric break; 2845ffd83dbSDimitry Andric } 2855ffd83dbSDimitry Andric } 2865ffd83dbSDimitry Andric 2875ffd83dbSDimitry Andric if (!Functions.empty()) 2885ffd83dbSDimitry Andric Changed |= optimizeFunctions(Functions); 2895ffd83dbSDimitry Andric 2905ffd83dbSDimitry Andric return Changed; 2915ffd83dbSDimitry Andric } 292