1*5ffd83dbSDimitry Andric //===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // The LLVM Compiler Infrastructure 4*5ffd83dbSDimitry Andric // 5*5ffd83dbSDimitry Andric // This file is distributed under the University of Illinois Open Source 6*5ffd83dbSDimitry Andric // License. See LICENSE.TXT for details. 7*5ffd83dbSDimitry Andric // 8*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 9*5ffd83dbSDimitry Andric // 10*5ffd83dbSDimitry Andric // Performs general IR level optimizations on SVE intrinsics. 11*5ffd83dbSDimitry Andric // 12*5ffd83dbSDimitry Andric // The main goal of this pass is to remove unnecessary reinterpret 13*5ffd83dbSDimitry Andric // intrinsics (llvm.aarch64.sve.convert.[to|from].svbool), e.g: 14*5ffd83dbSDimitry Andric // 15*5ffd83dbSDimitry Andric // %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a) 16*5ffd83dbSDimitry Andric // %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1) 17*5ffd83dbSDimitry Andric // 18*5ffd83dbSDimitry Andric // This pass also looks for ptest intrinsics & phi instructions where the 19*5ffd83dbSDimitry Andric // operands are being needlessly converted to and from svbool_t. 20*5ffd83dbSDimitry Andric // 21*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 22*5ffd83dbSDimitry Andric 23*5ffd83dbSDimitry Andric #include "Utils/AArch64BaseInfo.h" 24*5ffd83dbSDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 25*5ffd83dbSDimitry Andric #include "llvm/ADT/SetVector.h" 26*5ffd83dbSDimitry Andric #include "llvm/IR/Constants.h" 27*5ffd83dbSDimitry Andric #include "llvm/IR/Dominators.h" 28*5ffd83dbSDimitry Andric #include "llvm/IR/IRBuilder.h" 29*5ffd83dbSDimitry Andric #include "llvm/IR/Instructions.h" 30*5ffd83dbSDimitry Andric #include "llvm/IR/IntrinsicInst.h" 31*5ffd83dbSDimitry Andric #include "llvm/IR/IntrinsicsAArch64.h" 32*5ffd83dbSDimitry Andric #include "llvm/IR/LLVMContext.h" 33*5ffd83dbSDimitry Andric #include "llvm/IR/PatternMatch.h" 34*5ffd83dbSDimitry Andric #include "llvm/InitializePasses.h" 35*5ffd83dbSDimitry Andric #include "llvm/Support/Debug.h" 36*5ffd83dbSDimitry Andric 37*5ffd83dbSDimitry Andric using namespace llvm; 38*5ffd83dbSDimitry Andric using namespace llvm::PatternMatch; 39*5ffd83dbSDimitry Andric 40*5ffd83dbSDimitry Andric #define DEBUG_TYPE "sve-intrinsic-opts" 41*5ffd83dbSDimitry Andric 42*5ffd83dbSDimitry Andric namespace llvm { 43*5ffd83dbSDimitry Andric void initializeSVEIntrinsicOptsPass(PassRegistry &); 44*5ffd83dbSDimitry Andric } 45*5ffd83dbSDimitry Andric 46*5ffd83dbSDimitry Andric namespace { 47*5ffd83dbSDimitry Andric struct SVEIntrinsicOpts : public ModulePass { 48*5ffd83dbSDimitry Andric static char ID; // Pass identification, replacement for typeid 49*5ffd83dbSDimitry Andric SVEIntrinsicOpts() : ModulePass(ID) { 50*5ffd83dbSDimitry Andric initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry()); 51*5ffd83dbSDimitry Andric } 52*5ffd83dbSDimitry Andric 53*5ffd83dbSDimitry Andric bool runOnModule(Module &M) override; 54*5ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 55*5ffd83dbSDimitry Andric 56*5ffd83dbSDimitry Andric private: 57*5ffd83dbSDimitry Andric static IntrinsicInst *isReinterpretToSVBool(Value *V); 58*5ffd83dbSDimitry Andric 59*5ffd83dbSDimitry Andric static bool optimizeIntrinsic(Instruction *I); 60*5ffd83dbSDimitry Andric 61*5ffd83dbSDimitry Andric bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions); 62*5ffd83dbSDimitry Andric 63*5ffd83dbSDimitry Andric static bool optimizeConvertFromSVBool(IntrinsicInst *I); 64*5ffd83dbSDimitry Andric static bool optimizePTest(IntrinsicInst *I); 65*5ffd83dbSDimitry Andric 66*5ffd83dbSDimitry Andric static bool processPhiNode(IntrinsicInst *I); 67*5ffd83dbSDimitry Andric }; 68*5ffd83dbSDimitry Andric } // end anonymous namespace 69*5ffd83dbSDimitry Andric 70*5ffd83dbSDimitry Andric void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const { 71*5ffd83dbSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 72*5ffd83dbSDimitry Andric AU.setPreservesCFG(); 73*5ffd83dbSDimitry Andric } 74*5ffd83dbSDimitry Andric 75*5ffd83dbSDimitry Andric char SVEIntrinsicOpts::ID = 0; 76*5ffd83dbSDimitry Andric static const char *name = "SVE intrinsics optimizations"; 77*5ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 78*5ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 79*5ffd83dbSDimitry Andric INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 80*5ffd83dbSDimitry Andric 81*5ffd83dbSDimitry Andric namespace llvm { 82*5ffd83dbSDimitry Andric ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); } 83*5ffd83dbSDimitry Andric } // namespace llvm 84*5ffd83dbSDimitry Andric 85*5ffd83dbSDimitry Andric /// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr 86*5ffd83dbSDimitry Andric /// otherwise. 87*5ffd83dbSDimitry Andric IntrinsicInst *SVEIntrinsicOpts::isReinterpretToSVBool(Value *V) { 88*5ffd83dbSDimitry Andric IntrinsicInst *I = dyn_cast<IntrinsicInst>(V); 89*5ffd83dbSDimitry Andric if (!I) 90*5ffd83dbSDimitry Andric return nullptr; 91*5ffd83dbSDimitry Andric 92*5ffd83dbSDimitry Andric if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool) 93*5ffd83dbSDimitry Andric return nullptr; 94*5ffd83dbSDimitry Andric 95*5ffd83dbSDimitry Andric return I; 96*5ffd83dbSDimitry Andric } 97*5ffd83dbSDimitry Andric 98*5ffd83dbSDimitry Andric /// The function will remove redundant reinterprets casting in the presence 99*5ffd83dbSDimitry Andric /// of the control flow 100*5ffd83dbSDimitry Andric bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) { 101*5ffd83dbSDimitry Andric 102*5ffd83dbSDimitry Andric SmallVector<Instruction *, 32> Worklist; 103*5ffd83dbSDimitry Andric auto RequiredType = X->getType(); 104*5ffd83dbSDimitry Andric 105*5ffd83dbSDimitry Andric auto *PN = dyn_cast<PHINode>(X->getArgOperand(0)); 106*5ffd83dbSDimitry Andric assert(PN && "Expected Phi Node!"); 107*5ffd83dbSDimitry Andric 108*5ffd83dbSDimitry Andric // Don't create a new Phi unless we can remove the old one. 109*5ffd83dbSDimitry Andric if (!PN->hasOneUse()) 110*5ffd83dbSDimitry Andric return false; 111*5ffd83dbSDimitry Andric 112*5ffd83dbSDimitry Andric for (Value *IncValPhi : PN->incoming_values()) { 113*5ffd83dbSDimitry Andric auto *Reinterpret = isReinterpretToSVBool(IncValPhi); 114*5ffd83dbSDimitry Andric if (!Reinterpret || 115*5ffd83dbSDimitry Andric RequiredType != Reinterpret->getArgOperand(0)->getType()) 116*5ffd83dbSDimitry Andric return false; 117*5ffd83dbSDimitry Andric } 118*5ffd83dbSDimitry Andric 119*5ffd83dbSDimitry Andric // Create the new Phi 120*5ffd83dbSDimitry Andric LLVMContext &Ctx = PN->getContext(); 121*5ffd83dbSDimitry Andric IRBuilder<> Builder(Ctx); 122*5ffd83dbSDimitry Andric Builder.SetInsertPoint(PN); 123*5ffd83dbSDimitry Andric PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues()); 124*5ffd83dbSDimitry Andric Worklist.push_back(PN); 125*5ffd83dbSDimitry Andric 126*5ffd83dbSDimitry Andric for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) { 127*5ffd83dbSDimitry Andric auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I)); 128*5ffd83dbSDimitry Andric NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I)); 129*5ffd83dbSDimitry Andric Worklist.push_back(Reinterpret); 130*5ffd83dbSDimitry Andric } 131*5ffd83dbSDimitry Andric 132*5ffd83dbSDimitry Andric // Cleanup Phi Node and reinterprets 133*5ffd83dbSDimitry Andric X->replaceAllUsesWith(NPN); 134*5ffd83dbSDimitry Andric X->eraseFromParent(); 135*5ffd83dbSDimitry Andric 136*5ffd83dbSDimitry Andric for (auto &I : Worklist) 137*5ffd83dbSDimitry Andric if (I->use_empty()) 138*5ffd83dbSDimitry Andric I->eraseFromParent(); 139*5ffd83dbSDimitry Andric 140*5ffd83dbSDimitry Andric return true; 141*5ffd83dbSDimitry Andric } 142*5ffd83dbSDimitry Andric 143*5ffd83dbSDimitry Andric bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) { 144*5ffd83dbSDimitry Andric IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0)); 145*5ffd83dbSDimitry Andric IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1)); 146*5ffd83dbSDimitry Andric 147*5ffd83dbSDimitry Andric if (Op1 && Op2 && 148*5ffd83dbSDimitry Andric Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && 149*5ffd83dbSDimitry Andric Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && 150*5ffd83dbSDimitry Andric Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { 151*5ffd83dbSDimitry Andric 152*5ffd83dbSDimitry Andric Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; 153*5ffd83dbSDimitry Andric Type *Tys[] = {Op1->getArgOperand(0)->getType()}; 154*5ffd83dbSDimitry Andric Module *M = I->getParent()->getParent()->getParent(); 155*5ffd83dbSDimitry Andric 156*5ffd83dbSDimitry Andric auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys); 157*5ffd83dbSDimitry Andric auto CI = CallInst::Create(Fn, Ops, I->getName(), I); 158*5ffd83dbSDimitry Andric 159*5ffd83dbSDimitry Andric I->replaceAllUsesWith(CI); 160*5ffd83dbSDimitry Andric I->eraseFromParent(); 161*5ffd83dbSDimitry Andric if (Op1->use_empty()) 162*5ffd83dbSDimitry Andric Op1->eraseFromParent(); 163*5ffd83dbSDimitry Andric if (Op2->use_empty()) 164*5ffd83dbSDimitry Andric Op2->eraseFromParent(); 165*5ffd83dbSDimitry Andric 166*5ffd83dbSDimitry Andric return true; 167*5ffd83dbSDimitry Andric } 168*5ffd83dbSDimitry Andric 169*5ffd83dbSDimitry Andric return false; 170*5ffd83dbSDimitry Andric } 171*5ffd83dbSDimitry Andric 172*5ffd83dbSDimitry Andric bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) { 173*5ffd83dbSDimitry Andric assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool && 174*5ffd83dbSDimitry Andric "Unexpected opcode"); 175*5ffd83dbSDimitry Andric 176*5ffd83dbSDimitry Andric // If the reinterpret instruction operand is a PHI Node 177*5ffd83dbSDimitry Andric if (isa<PHINode>(I->getArgOperand(0))) 178*5ffd83dbSDimitry Andric return processPhiNode(I); 179*5ffd83dbSDimitry Andric 180*5ffd83dbSDimitry Andric // If we have a reinterpret intrinsic I of type A which is converting from 181*5ffd83dbSDimitry Andric // another reinterpret Y of type B, and the source type of Y is A, then we can 182*5ffd83dbSDimitry Andric // elide away both reinterprets if there are no other users of Y. 183*5ffd83dbSDimitry Andric auto *Y = isReinterpretToSVBool(I->getArgOperand(0)); 184*5ffd83dbSDimitry Andric if (!Y) 185*5ffd83dbSDimitry Andric return false; 186*5ffd83dbSDimitry Andric 187*5ffd83dbSDimitry Andric Value *SourceVal = Y->getArgOperand(0); 188*5ffd83dbSDimitry Andric if (I->getType() != SourceVal->getType()) 189*5ffd83dbSDimitry Andric return false; 190*5ffd83dbSDimitry Andric 191*5ffd83dbSDimitry Andric I->replaceAllUsesWith(SourceVal); 192*5ffd83dbSDimitry Andric I->eraseFromParent(); 193*5ffd83dbSDimitry Andric if (Y->use_empty()) 194*5ffd83dbSDimitry Andric Y->eraseFromParent(); 195*5ffd83dbSDimitry Andric 196*5ffd83dbSDimitry Andric return true; 197*5ffd83dbSDimitry Andric } 198*5ffd83dbSDimitry Andric 199*5ffd83dbSDimitry Andric bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) { 200*5ffd83dbSDimitry Andric IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I); 201*5ffd83dbSDimitry Andric if (!IntrI) 202*5ffd83dbSDimitry Andric return false; 203*5ffd83dbSDimitry Andric 204*5ffd83dbSDimitry Andric switch (IntrI->getIntrinsicID()) { 205*5ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_convert_from_svbool: 206*5ffd83dbSDimitry Andric return optimizeConvertFromSVBool(IntrI); 207*5ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_any: 208*5ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_first: 209*5ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_last: 210*5ffd83dbSDimitry Andric return optimizePTest(IntrI); 211*5ffd83dbSDimitry Andric default: 212*5ffd83dbSDimitry Andric return false; 213*5ffd83dbSDimitry Andric } 214*5ffd83dbSDimitry Andric 215*5ffd83dbSDimitry Andric return true; 216*5ffd83dbSDimitry Andric } 217*5ffd83dbSDimitry Andric 218*5ffd83dbSDimitry Andric bool SVEIntrinsicOpts::optimizeFunctions( 219*5ffd83dbSDimitry Andric SmallSetVector<Function *, 4> &Functions) { 220*5ffd83dbSDimitry Andric bool Changed = false; 221*5ffd83dbSDimitry Andric for (auto *F : Functions) { 222*5ffd83dbSDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree(); 223*5ffd83dbSDimitry Andric 224*5ffd83dbSDimitry Andric // Traverse the DT with an rpo walk so we see defs before uses, allowing 225*5ffd83dbSDimitry Andric // simplification to be done incrementally. 226*5ffd83dbSDimitry Andric BasicBlock *Root = DT->getRoot(); 227*5ffd83dbSDimitry Andric ReversePostOrderTraversal<BasicBlock *> RPOT(Root); 228*5ffd83dbSDimitry Andric for (auto *BB : RPOT) 229*5ffd83dbSDimitry Andric for (Instruction &I : make_early_inc_range(*BB)) 230*5ffd83dbSDimitry Andric Changed |= optimizeIntrinsic(&I); 231*5ffd83dbSDimitry Andric } 232*5ffd83dbSDimitry Andric return Changed; 233*5ffd83dbSDimitry Andric } 234*5ffd83dbSDimitry Andric 235*5ffd83dbSDimitry Andric bool SVEIntrinsicOpts::runOnModule(Module &M) { 236*5ffd83dbSDimitry Andric bool Changed = false; 237*5ffd83dbSDimitry Andric SmallSetVector<Function *, 4> Functions; 238*5ffd83dbSDimitry Andric 239*5ffd83dbSDimitry Andric // Check for SVE intrinsic declarations first so that we only iterate over 240*5ffd83dbSDimitry Andric // relevant functions. Where an appropriate declaration is found, store the 241*5ffd83dbSDimitry Andric // function(s) where it is used so we can target these only. 242*5ffd83dbSDimitry Andric for (auto &F : M.getFunctionList()) { 243*5ffd83dbSDimitry Andric if (!F.isDeclaration()) 244*5ffd83dbSDimitry Andric continue; 245*5ffd83dbSDimitry Andric 246*5ffd83dbSDimitry Andric switch (F.getIntrinsicID()) { 247*5ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_convert_from_svbool: 248*5ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_any: 249*5ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_first: 250*5ffd83dbSDimitry Andric case Intrinsic::aarch64_sve_ptest_last: 251*5ffd83dbSDimitry Andric for (auto I = F.user_begin(), E = F.user_end(); I != E;) { 252*5ffd83dbSDimitry Andric auto *Inst = dyn_cast<Instruction>(*I++); 253*5ffd83dbSDimitry Andric Functions.insert(Inst->getFunction()); 254*5ffd83dbSDimitry Andric } 255*5ffd83dbSDimitry Andric break; 256*5ffd83dbSDimitry Andric default: 257*5ffd83dbSDimitry Andric break; 258*5ffd83dbSDimitry Andric } 259*5ffd83dbSDimitry Andric } 260*5ffd83dbSDimitry Andric 261*5ffd83dbSDimitry Andric if (!Functions.empty()) 262*5ffd83dbSDimitry Andric Changed |= optimizeFunctions(Functions); 263*5ffd83dbSDimitry Andric 264*5ffd83dbSDimitry Andric return Changed; 265*5ffd83dbSDimitry Andric } 266