11a8468baSCraig Topper //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===// 21a8468baSCraig Topper // 31a8468baSCraig Topper // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 41a8468baSCraig Topper // See https://llvm.org/LICENSE.txt for license information. 51a8468baSCraig Topper // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61a8468baSCraig Topper // 71a8468baSCraig Topper //===----------------------------------------------------------------------===// 81a8468baSCraig Topper // 929463612SCraig Topper // This is a RISC-V specific version of CodeGenPrepare. 101a8468baSCraig Topper // It munges the code in the input function to better prepare it for 111a8468baSCraig Topper // SelectionDAG-based code generation. This works around limitations in it's 121a8468baSCraig Topper // basic-block-at-a-time approach. 131a8468baSCraig Topper // 141a8468baSCraig Topper //===----------------------------------------------------------------------===// 151a8468baSCraig Topper 161a8468baSCraig Topper #include "RISCV.h" 171a8468baSCraig Topper #include "RISCVTargetMachine.h" 181a8468baSCraig Topper #include "llvm/ADT/Statistic.h" 191a8468baSCraig Topper #include "llvm/Analysis/ValueTracking.h" 201a8468baSCraig Topper #include "llvm/CodeGen/TargetPassConfig.h" 2194279ae4SYeting Kuo #include "llvm/IR/Dominators.h" 2215b0fabbSLuke Lau #include "llvm/IR/IRBuilder.h" 23f19497f7SCraig Topper #include "llvm/IR/InstVisitor.h" 2415b0fabbSLuke Lau #include "llvm/IR/Intrinsics.h" 2593968912SCraig Topper #include "llvm/IR/PatternMatch.h" 261a8468baSCraig Topper #include "llvm/InitializePasses.h" 271a8468baSCraig Topper #include "llvm/Pass.h" 281a8468baSCraig Topper 291a8468baSCraig Topper using namespace llvm; 301a8468baSCraig Topper 311a8468baSCraig Topper #define DEBUG_TYPE "riscv-codegenprepare" 320f4c9c01SCraig Topper #define PASS_NAME "RISC-V CodeGenPrepare" 331a8468baSCraig Topper 341a8468baSCraig Topper namespace { 351a8468baSCraig Topper 36f19497f7SCraig Topper class RISCVCodeGenPrepare : public FunctionPass, 37f19497f7SCraig Topper public InstVisitor<RISCVCodeGenPrepare, bool> { 381a8468baSCraig Topper const DataLayout *DL; 3994279ae4SYeting Kuo const DominatorTree *DT; 401a8468baSCraig Topper const RISCVSubtarget *ST; 411a8468baSCraig Topper 421a8468baSCraig Topper public: 431a8468baSCraig Topper static char ID; 441a8468baSCraig Topper 451a8468baSCraig Topper RISCVCodeGenPrepare() : FunctionPass(ID) {} 461a8468baSCraig Topper 471a8468baSCraig Topper bool runOnFunction(Function &F) override; 481a8468baSCraig Topper 491a8468baSCraig Topper StringRef getPassName() const override { return PASS_NAME; } 501a8468baSCraig Topper 511a8468baSCraig Topper void getAnalysisUsage(AnalysisUsage &AU) const override { 521a8468baSCraig Topper AU.setPreservesCFG(); 5394279ae4SYeting Kuo AU.addRequired<DominatorTreeWrapperPass>(); 541a8468baSCraig Topper AU.addRequired<TargetPassConfig>(); 551a8468baSCraig Topper } 561a8468baSCraig Topper 57f19497f7SCraig Topper bool visitInstruction(Instruction &I) { return false; } 58f19497f7SCraig Topper bool visitAnd(BinaryOperator &BO); 5915b0fabbSLuke Lau bool visitIntrinsicInst(IntrinsicInst &I); 6094279ae4SYeting Kuo bool expandVPStrideLoad(IntrinsicInst &I); 611a8468baSCraig Topper }; 621a8468baSCraig Topper 631a8468baSCraig Topper } // end anonymous namespace 641a8468baSCraig Topper 658cc48309SCraig Topper // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set, 66784a2cd5SPhilip Reames // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill 67784a2cd5SPhilip Reames // the upper 32 bits with ones. 68f19497f7SCraig Topper bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { 698cc48309SCraig Topper if (!ST->is64Bit()) 708cc48309SCraig Topper return false; 718cc48309SCraig Topper 72f19497f7SCraig Topper if (!BO.getType()->isIntegerTy(64)) 738cc48309SCraig Topper return false; 748cc48309SCraig Topper 7593968912SCraig Topper using namespace PatternMatch; 76784a2cd5SPhilip Reames 7793968912SCraig Topper // Left hand side should be a zext nneg. 7893968912SCraig Topper Value *LHSSrc; 7993968912SCraig Topper if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc)))) 808cc48309SCraig Topper return false; 818cc48309SCraig Topper 828cc48309SCraig Topper if (!LHSSrc->getType()->isIntegerTy(32)) 838cc48309SCraig Topper return false; 848cc48309SCraig Topper 858cc48309SCraig Topper // Right hand side should be a constant. 86f19497f7SCraig Topper Value *RHS = BO.getOperand(1); 878cc48309SCraig Topper 888cc48309SCraig Topper auto *CI = dyn_cast<ConstantInt>(RHS); 898cc48309SCraig Topper if (!CI) 908cc48309SCraig Topper return false; 918cc48309SCraig Topper uint64_t C = CI->getZExtValue(); 928cc48309SCraig Topper 938cc48309SCraig Topper // Look for constants that fit in 32 bits but not simm12, and can be made 948cc48309SCraig Topper // into simm12 by sign extending bit 31. This will allow use of ANDI. 958cc48309SCraig Topper // TODO: Is worth making simm32? 968cc48309SCraig Topper if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C))) 978cc48309SCraig Topper return false; 988cc48309SCraig Topper 998cc48309SCraig Topper // Sign extend the constant and replace the And operand. 1008cc48309SCraig Topper C = SignExtend64<32>(C); 10193968912SCraig Topper BO.setOperand(1, ConstantInt::get(RHS->getType(), C)); 1028cc48309SCraig Topper 1038cc48309SCraig Topper return true; 1048cc48309SCraig Topper } 1058cc48309SCraig Topper 10615b0fabbSLuke Lau // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector 10715b0fabbSLuke Lau // reduction instructions write the result in the first element of a vector 10815b0fabbSLuke Lau // register. So when a reduction in a loop uses a scalar phi, we end up with 10915b0fabbSLuke Lau // unnecessary scalar moves: 11015b0fabbSLuke Lau // 11115b0fabbSLuke Lau // loop: 11215b0fabbSLuke Lau // vfmv.s.f v10, fa0 11315b0fabbSLuke Lau // vfredosum.vs v8, v8, v10 11415b0fabbSLuke Lau // vfmv.f.s fa0, v8 11515b0fabbSLuke Lau // 11615b0fabbSLuke Lau // This mainly affects ordered fadd reductions, since other types of reduction 11715b0fabbSLuke Lau // typically use element-wise vectorisation in the loop body. This tries to 11815b0fabbSLuke Lau // vectorize any scalar phis that feed into a fadd reduction: 11915b0fabbSLuke Lau // 12015b0fabbSLuke Lau // loop: 12115b0fabbSLuke Lau // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ] 122f0ac8903SPiotr Fusik // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi, 123f0ac8903SPiotr Fusik // <vscale x 2 x float> %vec) 12415b0fabbSLuke Lau // 12515b0fabbSLuke Lau // -> 12615b0fabbSLuke Lau // 12715b0fabbSLuke Lau // loop: 12815b0fabbSLuke Lau // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ] 12915b0fabbSLuke Lau // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0 130f0ac8903SPiotr Fusik // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x, 131f0ac8903SPiotr Fusik // <vscale x 2 x float> %vec) 13215b0fabbSLuke Lau // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0 13315b0fabbSLuke Lau // 13415b0fabbSLuke Lau // Which eliminates the scalar -> vector -> scalar crossing during instruction 13515b0fabbSLuke Lau // selection. 13615b0fabbSLuke Lau bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { 13794279ae4SYeting Kuo if (expandVPStrideLoad(I)) 13894279ae4SYeting Kuo return true; 13994279ae4SYeting Kuo 14015b0fabbSLuke Lau if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd) 14115b0fabbSLuke Lau return false; 14215b0fabbSLuke Lau 14315b0fabbSLuke Lau auto *PHI = dyn_cast<PHINode>(I.getOperand(0)); 14415b0fabbSLuke Lau if (!PHI || !PHI->hasOneUse() || 14515b0fabbSLuke Lau !llvm::is_contained(PHI->incoming_values(), &I)) 14615b0fabbSLuke Lau return false; 14715b0fabbSLuke Lau 14815b0fabbSLuke Lau Type *VecTy = I.getOperand(1)->getType(); 14915b0fabbSLuke Lau IRBuilder<> Builder(PHI); 15015b0fabbSLuke Lau auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues()); 15115b0fabbSLuke Lau 15215b0fabbSLuke Lau for (auto *BB : PHI->blocks()) { 15315b0fabbSLuke Lau Builder.SetInsertPoint(BB->getTerminator()); 15415b0fabbSLuke Lau Value *InsertElt = Builder.CreateInsertElement( 15515b0fabbSLuke Lau VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0); 15615b0fabbSLuke Lau VecPHI->addIncoming(InsertElt, BB); 15715b0fabbSLuke Lau } 15815b0fabbSLuke Lau 15915b0fabbSLuke Lau Builder.SetInsertPoint(&I); 16015b0fabbSLuke Lau I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0)); 16115b0fabbSLuke Lau 16215b0fabbSLuke Lau PHI->eraseFromParent(); 16315b0fabbSLuke Lau 16415b0fabbSLuke Lau return true; 16515b0fabbSLuke Lau } 16615b0fabbSLuke Lau 167d5f4f084SLuke Lau // Always expand zero strided loads so we match more .vx splat patterns, even if 168d5f4f084SLuke Lau // we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert 169d5f4f084SLuke Lau // it back to a strided load if it's optimized. 17094279ae4SYeting Kuo bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { 17194279ae4SYeting Kuo Value *BasePtr, *VL; 17294279ae4SYeting Kuo 17394279ae4SYeting Kuo using namespace PatternMatch; 17494279ae4SYeting Kuo if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>( 17594279ae4SYeting Kuo m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL)))) 17694279ae4SYeting Kuo return false; 17794279ae4SYeting Kuo 178563ae620SLuke Lau // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so 179563ae620SLuke Lau // avoid expanding here. 180563ae620SLuke Lau if (II.getType()->getScalarSizeInBits() > ST->getXLen()) 181563ae620SLuke Lau return false; 182563ae620SLuke Lau 18394279ae4SYeting Kuo if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II})) 18494279ae4SYeting Kuo return false; 18594279ae4SYeting Kuo 18694279ae4SYeting Kuo auto *VTy = cast<VectorType>(II.getType()); 18794279ae4SYeting Kuo 18894279ae4SYeting Kuo IRBuilder<> Builder(&II); 18994279ae4SYeting Kuo Type *STy = VTy->getElementType(); 19094279ae4SYeting Kuo Value *Val = Builder.CreateLoad(STy, BasePtr); 191*87af9ee8SYeting Kuo Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, 192*87af9ee8SYeting Kuo {Val, II.getOperand(2), VL}); 19394279ae4SYeting Kuo 19494279ae4SYeting Kuo II.replaceAllUsesWith(Res); 19594279ae4SYeting Kuo II.eraseFromParent(); 19694279ae4SYeting Kuo return true; 19794279ae4SYeting Kuo } 19894279ae4SYeting Kuo 1991a8468baSCraig Topper bool RISCVCodeGenPrepare::runOnFunction(Function &F) { 2001a8468baSCraig Topper if (skipFunction(F)) 2011a8468baSCraig Topper return false; 2021a8468baSCraig Topper 2031a8468baSCraig Topper auto &TPC = getAnalysis<TargetPassConfig>(); 2041a8468baSCraig Topper auto &TM = TPC.getTM<RISCVTargetMachine>(); 2051a8468baSCraig Topper ST = &TM.getSubtarget<RISCVSubtarget>(F); 2061a8468baSCraig Topper 2079df71d76SNikita Popov DL = &F.getDataLayout(); 20894279ae4SYeting Kuo DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2091a8468baSCraig Topper 2101a8468baSCraig Topper bool MadeChange = false; 211f19497f7SCraig Topper for (auto &BB : F) 212f19497f7SCraig Topper for (Instruction &I : llvm::make_early_inc_range(BB)) 213f19497f7SCraig Topper MadeChange |= visit(I); 2141a8468baSCraig Topper 2151a8468baSCraig Topper return MadeChange; 2161a8468baSCraig Topper } 2171a8468baSCraig Topper 2181a8468baSCraig Topper INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) 2191a8468baSCraig Topper INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 2201a8468baSCraig Topper INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) 2211a8468baSCraig Topper 2221a8468baSCraig Topper char RISCVCodeGenPrepare::ID = 0; 2231a8468baSCraig Topper 2241a8468baSCraig Topper FunctionPass *llvm::createRISCVCodeGenPreparePass() { 2251a8468baSCraig Topper return new RISCVCodeGenPrepare(); 2261a8468baSCraig Topper } 227