1fcaf7f86SDimitry Andric //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===// 2fcaf7f86SDimitry Andric // 3fcaf7f86SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fcaf7f86SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fcaf7f86SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fcaf7f86SDimitry Andric // 7fcaf7f86SDimitry Andric //===----------------------------------------------------------------------===// 8fcaf7f86SDimitry Andric // 906c3fb27SDimitry Andric // This is a RISC-V specific version of CodeGenPrepare. 10fcaf7f86SDimitry Andric // It munges the code in the input function to better prepare it for 11fcaf7f86SDimitry Andric // SelectionDAG-based code generation. This works around limitations in it's 12fcaf7f86SDimitry Andric // basic-block-at-a-time approach. 13fcaf7f86SDimitry Andric // 14fcaf7f86SDimitry Andric //===----------------------------------------------------------------------===// 15fcaf7f86SDimitry Andric 16fcaf7f86SDimitry Andric #include "RISCV.h" 17fcaf7f86SDimitry Andric #include "RISCVTargetMachine.h" 18fcaf7f86SDimitry Andric #include "llvm/ADT/Statistic.h" 19fcaf7f86SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 20fcaf7f86SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 210fca6ea1SDimitry Andric #include "llvm/IR/Dominators.h" 227a6dacacSDimitry Andric #include "llvm/IR/IRBuilder.h" 23bdd1243dSDimitry Andric #include "llvm/IR/InstVisitor.h" 247a6dacacSDimitry Andric #include "llvm/IR/Intrinsics.h" 250fca6ea1SDimitry Andric #include "llvm/IR/IntrinsicsRISCV.h" 26972a253aSDimitry Andric #include "llvm/IR/PatternMatch.h" 27fcaf7f86SDimitry Andric #include "llvm/InitializePasses.h" 28fcaf7f86SDimitry Andric #include "llvm/Pass.h" 29fcaf7f86SDimitry Andric 30fcaf7f86SDimitry Andric using namespace llvm; 31fcaf7f86SDimitry Andric 32fcaf7f86SDimitry Andric #define DEBUG_TYPE "riscv-codegenprepare" 3306c3fb27SDimitry Andric #define PASS_NAME "RISC-V CodeGenPrepare" 34fcaf7f86SDimitry Andric 35fcaf7f86SDimitry Andric namespace { 36fcaf7f86SDimitry Andric 37bdd1243dSDimitry Andric class RISCVCodeGenPrepare : public FunctionPass, 38bdd1243dSDimitry Andric public InstVisitor<RISCVCodeGenPrepare, bool> { 39fcaf7f86SDimitry Andric const DataLayout *DL; 400fca6ea1SDimitry Andric const DominatorTree *DT; 41fcaf7f86SDimitry Andric const RISCVSubtarget *ST; 42fcaf7f86SDimitry Andric 43fcaf7f86SDimitry Andric public: 44fcaf7f86SDimitry Andric static char ID; 45fcaf7f86SDimitry Andric 46fcaf7f86SDimitry Andric RISCVCodeGenPrepare() : FunctionPass(ID) {} 47fcaf7f86SDimitry Andric 48fcaf7f86SDimitry Andric bool runOnFunction(Function &F) override; 49fcaf7f86SDimitry Andric 50fcaf7f86SDimitry Andric StringRef getPassName() const override { return PASS_NAME; } 51fcaf7f86SDimitry Andric 52fcaf7f86SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 53fcaf7f86SDimitry Andric AU.setPreservesCFG(); 540fca6ea1SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 55fcaf7f86SDimitry Andric AU.addRequired<TargetPassConfig>(); 56fcaf7f86SDimitry Andric } 57fcaf7f86SDimitry Andric 58bdd1243dSDimitry Andric bool visitInstruction(Instruction &I) { return false; } 59bdd1243dSDimitry Andric bool visitAnd(BinaryOperator &BO); 607a6dacacSDimitry Andric bool visitIntrinsicInst(IntrinsicInst &I); 610fca6ea1SDimitry Andric bool expandVPStrideLoad(IntrinsicInst &I); 62fcaf7f86SDimitry Andric }; 63fcaf7f86SDimitry Andric 64fcaf7f86SDimitry Andric } // end anonymous namespace 65fcaf7f86SDimitry Andric 66fcaf7f86SDimitry Andric // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set, 675f757f3fSDimitry Andric // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill 685f757f3fSDimitry Andric // the upper 32 bits with ones. 69bdd1243dSDimitry Andric bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { 70fcaf7f86SDimitry Andric if (!ST->is64Bit()) 71fcaf7f86SDimitry Andric return false; 72fcaf7f86SDimitry Andric 73bdd1243dSDimitry Andric if (!BO.getType()->isIntegerTy(64)) 74fcaf7f86SDimitry Andric return false; 75fcaf7f86SDimitry Andric 767a6dacacSDimitry Andric using namespace PatternMatch; 775f757f3fSDimitry Andric 787a6dacacSDimitry Andric // Left hand side should be a zext nneg. 797a6dacacSDimitry Andric Value *LHSSrc; 807a6dacacSDimitry Andric if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc)))) 81fcaf7f86SDimitry Andric return false; 82fcaf7f86SDimitry Andric 83fcaf7f86SDimitry Andric if (!LHSSrc->getType()->isIntegerTy(32)) 84fcaf7f86SDimitry Andric return false; 85fcaf7f86SDimitry Andric 86fcaf7f86SDimitry Andric // Right hand side should be a constant. 87bdd1243dSDimitry Andric Value *RHS = BO.getOperand(1); 88fcaf7f86SDimitry Andric 89fcaf7f86SDimitry Andric auto *CI = dyn_cast<ConstantInt>(RHS); 90fcaf7f86SDimitry Andric if (!CI) 91fcaf7f86SDimitry Andric return false; 92fcaf7f86SDimitry Andric uint64_t C = CI->getZExtValue(); 93fcaf7f86SDimitry Andric 94fcaf7f86SDimitry Andric // Look for constants that fit in 32 bits but not simm12, and can be made 95fcaf7f86SDimitry Andric // into simm12 by sign extending bit 31. This will allow use of ANDI. 96fcaf7f86SDimitry Andric // TODO: Is worth making simm32? 97fcaf7f86SDimitry Andric if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C))) 98fcaf7f86SDimitry Andric return false; 99fcaf7f86SDimitry Andric 100fcaf7f86SDimitry Andric // Sign extend the constant and replace the And operand. 101fcaf7f86SDimitry Andric C = SignExtend64<32>(C); 1027a6dacacSDimitry Andric BO.setOperand(1, ConstantInt::get(RHS->getType(), C)); 1037a6dacacSDimitry Andric 1047a6dacacSDimitry Andric return true; 1057a6dacacSDimitry Andric } 1067a6dacacSDimitry Andric 1077a6dacacSDimitry Andric // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector 1087a6dacacSDimitry Andric // reduction instructions write the result in the first element of a vector 1097a6dacacSDimitry Andric // register. So when a reduction in a loop uses a scalar phi, we end up with 1107a6dacacSDimitry Andric // unnecessary scalar moves: 1117a6dacacSDimitry Andric // 1127a6dacacSDimitry Andric // loop: 1137a6dacacSDimitry Andric // vfmv.s.f v10, fa0 1147a6dacacSDimitry Andric // vfredosum.vs v8, v8, v10 1157a6dacacSDimitry Andric // vfmv.f.s fa0, v8 1167a6dacacSDimitry Andric // 1177a6dacacSDimitry Andric // This mainly affects ordered fadd reductions, since other types of reduction 1187a6dacacSDimitry Andric // typically use element-wise vectorisation in the loop body. This tries to 1197a6dacacSDimitry Andric // vectorize any scalar phis that feed into a fadd reduction: 1207a6dacacSDimitry Andric // 1217a6dacacSDimitry Andric // loop: 1227a6dacacSDimitry Andric // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ] 1230fca6ea1SDimitry Andric // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi, 1240fca6ea1SDimitry Andric // <vscale x 2 x float> %vec) 1257a6dacacSDimitry Andric // 1267a6dacacSDimitry Andric // -> 1277a6dacacSDimitry Andric // 1287a6dacacSDimitry Andric // loop: 1297a6dacacSDimitry Andric // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ] 1307a6dacacSDimitry Andric // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0 1310fca6ea1SDimitry Andric // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x, 1320fca6ea1SDimitry Andric // <vscale x 2 x float> %vec) 1337a6dacacSDimitry Andric // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0 1347a6dacacSDimitry Andric // 1357a6dacacSDimitry Andric // Which eliminates the scalar -> vector -> scalar crossing during instruction 1367a6dacacSDimitry Andric // selection. 1377a6dacacSDimitry Andric bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { 1380fca6ea1SDimitry Andric if (expandVPStrideLoad(I)) 1390fca6ea1SDimitry Andric return true; 1400fca6ea1SDimitry Andric 1417a6dacacSDimitry Andric if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd) 1427a6dacacSDimitry Andric return false; 1437a6dacacSDimitry Andric 1447a6dacacSDimitry Andric auto *PHI = dyn_cast<PHINode>(I.getOperand(0)); 1457a6dacacSDimitry Andric if (!PHI || !PHI->hasOneUse() || 1467a6dacacSDimitry Andric !llvm::is_contained(PHI->incoming_values(), &I)) 1477a6dacacSDimitry Andric return false; 1487a6dacacSDimitry Andric 1497a6dacacSDimitry Andric Type *VecTy = I.getOperand(1)->getType(); 1507a6dacacSDimitry Andric IRBuilder<> Builder(PHI); 1517a6dacacSDimitry Andric auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues()); 1527a6dacacSDimitry Andric 1537a6dacacSDimitry Andric for (auto *BB : PHI->blocks()) { 1547a6dacacSDimitry Andric Builder.SetInsertPoint(BB->getTerminator()); 1557a6dacacSDimitry Andric Value *InsertElt = Builder.CreateInsertElement( 1567a6dacacSDimitry Andric VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0); 1577a6dacacSDimitry Andric VecPHI->addIncoming(InsertElt, BB); 1587a6dacacSDimitry Andric } 1597a6dacacSDimitry Andric 1607a6dacacSDimitry Andric Builder.SetInsertPoint(&I); 1617a6dacacSDimitry Andric I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0)); 1627a6dacacSDimitry Andric 1637a6dacacSDimitry Andric PHI->eraseFromParent(); 164fcaf7f86SDimitry Andric 165fcaf7f86SDimitry Andric return true; 166fcaf7f86SDimitry Andric } 167fcaf7f86SDimitry Andric 1680fca6ea1SDimitry Andric // Always expand zero strided loads so we match more .vx splat patterns, even if 1690fca6ea1SDimitry Andric // we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert 1700fca6ea1SDimitry Andric // it back to a strided load if it's optimized. 1710fca6ea1SDimitry Andric bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { 1720fca6ea1SDimitry Andric Value *BasePtr, *VL; 1730fca6ea1SDimitry Andric 1740fca6ea1SDimitry Andric using namespace PatternMatch; 1750fca6ea1SDimitry Andric if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>( 1760fca6ea1SDimitry Andric m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL)))) 1770fca6ea1SDimitry Andric return false; 1780fca6ea1SDimitry Andric 1790fca6ea1SDimitry Andric // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so 1800fca6ea1SDimitry Andric // avoid expanding here. 1810fca6ea1SDimitry Andric if (II.getType()->getScalarSizeInBits() > ST->getXLen()) 1820fca6ea1SDimitry Andric return false; 1830fca6ea1SDimitry Andric 1840fca6ea1SDimitry Andric if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II})) 1850fca6ea1SDimitry Andric return false; 1860fca6ea1SDimitry Andric 1870fca6ea1SDimitry Andric auto *VTy = cast<VectorType>(II.getType()); 1880fca6ea1SDimitry Andric 1890fca6ea1SDimitry Andric IRBuilder<> Builder(&II); 1900fca6ea1SDimitry Andric Type *STy = VTy->getElementType(); 1910fca6ea1SDimitry Andric Value *Val = Builder.CreateLoad(STy, BasePtr); 192*62987288SDimitry Andric Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, 193*62987288SDimitry Andric {Val, II.getOperand(2), VL}); 1940fca6ea1SDimitry Andric 1950fca6ea1SDimitry Andric II.replaceAllUsesWith(Res); 1960fca6ea1SDimitry Andric II.eraseFromParent(); 1970fca6ea1SDimitry Andric return true; 1980fca6ea1SDimitry Andric } 1990fca6ea1SDimitry Andric 200fcaf7f86SDimitry Andric bool RISCVCodeGenPrepare::runOnFunction(Function &F) { 201fcaf7f86SDimitry Andric if (skipFunction(F)) 202fcaf7f86SDimitry Andric return false; 203fcaf7f86SDimitry Andric 204fcaf7f86SDimitry Andric auto &TPC = getAnalysis<TargetPassConfig>(); 205fcaf7f86SDimitry Andric auto &TM = TPC.getTM<RISCVTargetMachine>(); 206fcaf7f86SDimitry Andric ST = &TM.getSubtarget<RISCVSubtarget>(F); 207fcaf7f86SDimitry Andric 2080fca6ea1SDimitry Andric DL = &F.getDataLayout(); 2090fca6ea1SDimitry Andric DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 210fcaf7f86SDimitry Andric 211fcaf7f86SDimitry Andric bool MadeChange = false; 212bdd1243dSDimitry Andric for (auto &BB : F) 213bdd1243dSDimitry Andric for (Instruction &I : llvm::make_early_inc_range(BB)) 214bdd1243dSDimitry Andric MadeChange |= visit(I); 215fcaf7f86SDimitry Andric 216fcaf7f86SDimitry Andric return MadeChange; 217fcaf7f86SDimitry Andric } 218fcaf7f86SDimitry Andric 219fcaf7f86SDimitry Andric INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) 220fcaf7f86SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 221fcaf7f86SDimitry Andric INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) 222fcaf7f86SDimitry Andric 223fcaf7f86SDimitry Andric char RISCVCodeGenPrepare::ID = 0; 224fcaf7f86SDimitry Andric 225fcaf7f86SDimitry Andric FunctionPass *llvm::createRISCVCodeGenPreparePass() { 226fcaf7f86SDimitry Andric return new RISCVCodeGenPrepare(); 227fcaf7f86SDimitry Andric } 228