xref: /llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp (revision 82d5dd28b4de7245088f7ed40da37f8cf80461e4)
11a8468baSCraig Topper //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
21a8468baSCraig Topper //
31a8468baSCraig Topper // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
41a8468baSCraig Topper // See https://llvm.org/LICENSE.txt for license information.
51a8468baSCraig Topper // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61a8468baSCraig Topper //
71a8468baSCraig Topper //===----------------------------------------------------------------------===//
81a8468baSCraig Topper //
929463612SCraig Topper // This is a RISC-V specific version of CodeGenPrepare.
101a8468baSCraig Topper // It munges the code in the input function to better prepare it for
111a8468baSCraig Topper // SelectionDAG-based code generation. This works around limitations in it's
121a8468baSCraig Topper // basic-block-at-a-time approach.
131a8468baSCraig Topper //
141a8468baSCraig Topper //===----------------------------------------------------------------------===//
151a8468baSCraig Topper 
161a8468baSCraig Topper #include "RISCV.h"
171a8468baSCraig Topper #include "RISCVTargetMachine.h"
181a8468baSCraig Topper #include "llvm/ADT/Statistic.h"
191a8468baSCraig Topper #include "llvm/Analysis/ValueTracking.h"
201a8468baSCraig Topper #include "llvm/CodeGen/TargetPassConfig.h"
2194279ae4SYeting Kuo #include "llvm/IR/Dominators.h"
2215b0fabbSLuke Lau #include "llvm/IR/IRBuilder.h"
23f19497f7SCraig Topper #include "llvm/IR/InstVisitor.h"
2415b0fabbSLuke Lau #include "llvm/IR/Intrinsics.h"
2593968912SCraig Topper #include "llvm/IR/PatternMatch.h"
261a8468baSCraig Topper #include "llvm/InitializePasses.h"
271a8468baSCraig Topper #include "llvm/Pass.h"
281a8468baSCraig Topper 
291a8468baSCraig Topper using namespace llvm;
301a8468baSCraig Topper 
311a8468baSCraig Topper #define DEBUG_TYPE "riscv-codegenprepare"
320f4c9c01SCraig Topper #define PASS_NAME "RISC-V CodeGenPrepare"
331a8468baSCraig Topper 
341a8468baSCraig Topper namespace {
351a8468baSCraig Topper 
36f19497f7SCraig Topper class RISCVCodeGenPrepare : public FunctionPass,
37f19497f7SCraig Topper                             public InstVisitor<RISCVCodeGenPrepare, bool> {
381a8468baSCraig Topper   const DataLayout *DL;
3994279ae4SYeting Kuo   const DominatorTree *DT;
401a8468baSCraig Topper   const RISCVSubtarget *ST;
411a8468baSCraig Topper 
421a8468baSCraig Topper public:
431a8468baSCraig Topper   static char ID;
441a8468baSCraig Topper 
451a8468baSCraig Topper   RISCVCodeGenPrepare() : FunctionPass(ID) {}
461a8468baSCraig Topper 
471a8468baSCraig Topper   bool runOnFunction(Function &F) override;
481a8468baSCraig Topper 
491a8468baSCraig Topper   StringRef getPassName() const override { return PASS_NAME; }
501a8468baSCraig Topper 
511a8468baSCraig Topper   void getAnalysisUsage(AnalysisUsage &AU) const override {
521a8468baSCraig Topper     AU.setPreservesCFG();
5394279ae4SYeting Kuo     AU.addRequired<DominatorTreeWrapperPass>();
541a8468baSCraig Topper     AU.addRequired<TargetPassConfig>();
551a8468baSCraig Topper   }
561a8468baSCraig Topper 
57f19497f7SCraig Topper   bool visitInstruction(Instruction &I) { return false; }
58f19497f7SCraig Topper   bool visitAnd(BinaryOperator &BO);
5915b0fabbSLuke Lau   bool visitIntrinsicInst(IntrinsicInst &I);
6094279ae4SYeting Kuo   bool expandVPStrideLoad(IntrinsicInst &I);
611a8468baSCraig Topper };
621a8468baSCraig Topper 
631a8468baSCraig Topper } // end anonymous namespace
641a8468baSCraig Topper 
658cc48309SCraig Topper // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
66784a2cd5SPhilip Reames // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
67784a2cd5SPhilip Reames // the upper 32 bits with ones.
68f19497f7SCraig Topper bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
698cc48309SCraig Topper   if (!ST->is64Bit())
708cc48309SCraig Topper     return false;
718cc48309SCraig Topper 
72f19497f7SCraig Topper   if (!BO.getType()->isIntegerTy(64))
738cc48309SCraig Topper     return false;
748cc48309SCraig Topper 
7593968912SCraig Topper   using namespace PatternMatch;
76784a2cd5SPhilip Reames 
7793968912SCraig Topper   // Left hand side should be a zext nneg.
7893968912SCraig Topper   Value *LHSSrc;
7993968912SCraig Topper   if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc))))
808cc48309SCraig Topper     return false;
818cc48309SCraig Topper 
828cc48309SCraig Topper   if (!LHSSrc->getType()->isIntegerTy(32))
838cc48309SCraig Topper     return false;
848cc48309SCraig Topper 
858cc48309SCraig Topper   // Right hand side should be a constant.
86f19497f7SCraig Topper   Value *RHS = BO.getOperand(1);
878cc48309SCraig Topper 
888cc48309SCraig Topper   auto *CI = dyn_cast<ConstantInt>(RHS);
898cc48309SCraig Topper   if (!CI)
908cc48309SCraig Topper     return false;
918cc48309SCraig Topper   uint64_t C = CI->getZExtValue();
928cc48309SCraig Topper 
938cc48309SCraig Topper   // Look for constants that fit in 32 bits but not simm12, and can be made
948cc48309SCraig Topper   // into simm12 by sign extending bit 31. This will allow use of ANDI.
958cc48309SCraig Topper   // TODO: Is worth making simm32?
968cc48309SCraig Topper   if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
978cc48309SCraig Topper     return false;
988cc48309SCraig Topper 
998cc48309SCraig Topper   // Sign extend the constant and replace the And operand.
1008cc48309SCraig Topper   C = SignExtend64<32>(C);
10193968912SCraig Topper   BO.setOperand(1, ConstantInt::get(RHS->getType(), C));
1028cc48309SCraig Topper 
1038cc48309SCraig Topper   return true;
1048cc48309SCraig Topper }
1058cc48309SCraig Topper 
10615b0fabbSLuke Lau // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
10715b0fabbSLuke Lau // reduction instructions write the result in the first element of a vector
10815b0fabbSLuke Lau // register. So when a reduction in a loop uses a scalar phi, we end up with
10915b0fabbSLuke Lau // unnecessary scalar moves:
11015b0fabbSLuke Lau //
11115b0fabbSLuke Lau // loop:
11215b0fabbSLuke Lau // vfmv.s.f v10, fa0
11315b0fabbSLuke Lau // vfredosum.vs v8, v8, v10
11415b0fabbSLuke Lau // vfmv.f.s fa0, v8
11515b0fabbSLuke Lau //
11615b0fabbSLuke Lau // This mainly affects ordered fadd reductions, since other types of reduction
11715b0fabbSLuke Lau // typically use element-wise vectorisation in the loop body. This tries to
11815b0fabbSLuke Lau // vectorize any scalar phis that feed into a fadd reduction:
11915b0fabbSLuke Lau //
12015b0fabbSLuke Lau // loop:
12115b0fabbSLuke Lau // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
122f0ac8903SPiotr Fusik // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,
123f0ac8903SPiotr Fusik //                                                    <vscale x 2 x float> %vec)
12415b0fabbSLuke Lau //
12515b0fabbSLuke Lau // ->
12615b0fabbSLuke Lau //
12715b0fabbSLuke Lau // loop:
12815b0fabbSLuke Lau // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
12915b0fabbSLuke Lau // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
130f0ac8903SPiotr Fusik // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,
131f0ac8903SPiotr Fusik //                                                    <vscale x 2 x float> %vec)
13215b0fabbSLuke Lau // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
13315b0fabbSLuke Lau //
13415b0fabbSLuke Lau // Which eliminates the scalar -> vector -> scalar crossing during instruction
13515b0fabbSLuke Lau // selection.
13615b0fabbSLuke Lau bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
13794279ae4SYeting Kuo   if (expandVPStrideLoad(I))
13894279ae4SYeting Kuo     return true;
13994279ae4SYeting Kuo 
14015b0fabbSLuke Lau   if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
14115b0fabbSLuke Lau     return false;
14215b0fabbSLuke Lau 
14315b0fabbSLuke Lau   auto *PHI = dyn_cast<PHINode>(I.getOperand(0));
14415b0fabbSLuke Lau   if (!PHI || !PHI->hasOneUse() ||
14515b0fabbSLuke Lau       !llvm::is_contained(PHI->incoming_values(), &I))
14615b0fabbSLuke Lau     return false;
14715b0fabbSLuke Lau 
14815b0fabbSLuke Lau   Type *VecTy = I.getOperand(1)->getType();
14915b0fabbSLuke Lau   IRBuilder<> Builder(PHI);
15015b0fabbSLuke Lau   auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues());
15115b0fabbSLuke Lau 
15215b0fabbSLuke Lau   for (auto *BB : PHI->blocks()) {
15315b0fabbSLuke Lau     Builder.SetInsertPoint(BB->getTerminator());
15415b0fabbSLuke Lau     Value *InsertElt = Builder.CreateInsertElement(
15515b0fabbSLuke Lau         VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0);
15615b0fabbSLuke Lau     VecPHI->addIncoming(InsertElt, BB);
15715b0fabbSLuke Lau   }
15815b0fabbSLuke Lau 
15915b0fabbSLuke Lau   Builder.SetInsertPoint(&I);
16015b0fabbSLuke Lau   I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0));
16115b0fabbSLuke Lau 
16215b0fabbSLuke Lau   PHI->eraseFromParent();
16315b0fabbSLuke Lau 
16415b0fabbSLuke Lau   return true;
16515b0fabbSLuke Lau }
16615b0fabbSLuke Lau 
167d5f4f084SLuke Lau // Always expand zero strided loads so we match more .vx splat patterns, even if
168d5f4f084SLuke Lau // we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
169d5f4f084SLuke Lau // it back to a strided load if it's optimized.
17094279ae4SYeting Kuo bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
17194279ae4SYeting Kuo   Value *BasePtr, *VL;
17294279ae4SYeting Kuo 
17394279ae4SYeting Kuo   using namespace PatternMatch;
17494279ae4SYeting Kuo   if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
17594279ae4SYeting Kuo                       m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))
17694279ae4SYeting Kuo     return false;
17794279ae4SYeting Kuo 
178563ae620SLuke Lau   // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
179563ae620SLuke Lau   // avoid expanding here.
180563ae620SLuke Lau   if (II.getType()->getScalarSizeInBits() > ST->getXLen())
181563ae620SLuke Lau     return false;
182563ae620SLuke Lau 
18394279ae4SYeting Kuo   if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))
18494279ae4SYeting Kuo     return false;
18594279ae4SYeting Kuo 
18694279ae4SYeting Kuo   auto *VTy = cast<VectorType>(II.getType());
18794279ae4SYeting Kuo 
18894279ae4SYeting Kuo   IRBuilder<> Builder(&II);
18994279ae4SYeting Kuo   Type *STy = VTy->getElementType();
19094279ae4SYeting Kuo   Value *Val = Builder.CreateLoad(STy, BasePtr);
191*87af9ee8SYeting Kuo   Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
192*87af9ee8SYeting Kuo                                        {Val, II.getOperand(2), VL});
19394279ae4SYeting Kuo 
19494279ae4SYeting Kuo   II.replaceAllUsesWith(Res);
19594279ae4SYeting Kuo   II.eraseFromParent();
19694279ae4SYeting Kuo   return true;
19794279ae4SYeting Kuo }
19894279ae4SYeting Kuo 
1991a8468baSCraig Topper bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
2001a8468baSCraig Topper   if (skipFunction(F))
2011a8468baSCraig Topper     return false;
2021a8468baSCraig Topper 
2031a8468baSCraig Topper   auto &TPC = getAnalysis<TargetPassConfig>();
2041a8468baSCraig Topper   auto &TM = TPC.getTM<RISCVTargetMachine>();
2051a8468baSCraig Topper   ST = &TM.getSubtarget<RISCVSubtarget>(F);
2061a8468baSCraig Topper 
2079df71d76SNikita Popov   DL = &F.getDataLayout();
20894279ae4SYeting Kuo   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2091a8468baSCraig Topper 
2101a8468baSCraig Topper   bool MadeChange = false;
211f19497f7SCraig Topper   for (auto &BB : F)
212f19497f7SCraig Topper     for (Instruction &I : llvm::make_early_inc_range(BB))
213f19497f7SCraig Topper       MadeChange |= visit(I);
2141a8468baSCraig Topper 
2151a8468baSCraig Topper   return MadeChange;
2161a8468baSCraig Topper }
2171a8468baSCraig Topper 
2181a8468baSCraig Topper INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
2191a8468baSCraig Topper INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
2201a8468baSCraig Topper INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
2211a8468baSCraig Topper 
2221a8468baSCraig Topper char RISCVCodeGenPrepare::ID = 0;
2231a8468baSCraig Topper 
2241a8468baSCraig Topper FunctionPass *llvm::createRISCVCodeGenPreparePass() {
2251a8468baSCraig Topper   return new RISCVCodeGenPrepare();
2261a8468baSCraig Topper }
227