xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1fcaf7f86SDimitry Andric //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
2fcaf7f86SDimitry Andric //
3fcaf7f86SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fcaf7f86SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fcaf7f86SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fcaf7f86SDimitry Andric //
7fcaf7f86SDimitry Andric //===----------------------------------------------------------------------===//
8fcaf7f86SDimitry Andric //
906c3fb27SDimitry Andric // This is a RISC-V specific version of CodeGenPrepare.
10fcaf7f86SDimitry Andric // It munges the code in the input function to better prepare it for
11fcaf7f86SDimitry Andric // SelectionDAG-based code generation. This works around limitations in it's
12fcaf7f86SDimitry Andric // basic-block-at-a-time approach.
13fcaf7f86SDimitry Andric //
14fcaf7f86SDimitry Andric //===----------------------------------------------------------------------===//
15fcaf7f86SDimitry Andric 
16fcaf7f86SDimitry Andric #include "RISCV.h"
17fcaf7f86SDimitry Andric #include "RISCVTargetMachine.h"
18fcaf7f86SDimitry Andric #include "llvm/ADT/Statistic.h"
19fcaf7f86SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
20fcaf7f86SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
21*0fca6ea1SDimitry Andric #include "llvm/IR/Dominators.h"
227a6dacacSDimitry Andric #include "llvm/IR/IRBuilder.h"
23bdd1243dSDimitry Andric #include "llvm/IR/InstVisitor.h"
247a6dacacSDimitry Andric #include "llvm/IR/Intrinsics.h"
25*0fca6ea1SDimitry Andric #include "llvm/IR/IntrinsicsRISCV.h"
26972a253aSDimitry Andric #include "llvm/IR/PatternMatch.h"
27fcaf7f86SDimitry Andric #include "llvm/InitializePasses.h"
28fcaf7f86SDimitry Andric #include "llvm/Pass.h"
29fcaf7f86SDimitry Andric 
30fcaf7f86SDimitry Andric using namespace llvm;
31fcaf7f86SDimitry Andric 
32fcaf7f86SDimitry Andric #define DEBUG_TYPE "riscv-codegenprepare"
3306c3fb27SDimitry Andric #define PASS_NAME "RISC-V CodeGenPrepare"
34fcaf7f86SDimitry Andric 
35fcaf7f86SDimitry Andric namespace {
36fcaf7f86SDimitry Andric 
37bdd1243dSDimitry Andric class RISCVCodeGenPrepare : public FunctionPass,
38bdd1243dSDimitry Andric                             public InstVisitor<RISCVCodeGenPrepare, bool> {
39fcaf7f86SDimitry Andric   const DataLayout *DL;
40*0fca6ea1SDimitry Andric   const DominatorTree *DT;
41fcaf7f86SDimitry Andric   const RISCVSubtarget *ST;
42fcaf7f86SDimitry Andric 
43fcaf7f86SDimitry Andric public:
44fcaf7f86SDimitry Andric   static char ID;
45fcaf7f86SDimitry Andric 
46fcaf7f86SDimitry Andric   RISCVCodeGenPrepare() : FunctionPass(ID) {}
47fcaf7f86SDimitry Andric 
48fcaf7f86SDimitry Andric   bool runOnFunction(Function &F) override;
49fcaf7f86SDimitry Andric 
50fcaf7f86SDimitry Andric   StringRef getPassName() const override { return PASS_NAME; }
51fcaf7f86SDimitry Andric 
52fcaf7f86SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
53fcaf7f86SDimitry Andric     AU.setPreservesCFG();
54*0fca6ea1SDimitry Andric     AU.addRequired<DominatorTreeWrapperPass>();
55fcaf7f86SDimitry Andric     AU.addRequired<TargetPassConfig>();
56fcaf7f86SDimitry Andric   }
57fcaf7f86SDimitry Andric 
58bdd1243dSDimitry Andric   bool visitInstruction(Instruction &I) { return false; }
59bdd1243dSDimitry Andric   bool visitAnd(BinaryOperator &BO);
607a6dacacSDimitry Andric   bool visitIntrinsicInst(IntrinsicInst &I);
61*0fca6ea1SDimitry Andric   bool expandVPStrideLoad(IntrinsicInst &I);
62fcaf7f86SDimitry Andric };
63fcaf7f86SDimitry Andric 
64fcaf7f86SDimitry Andric } // end anonymous namespace
65fcaf7f86SDimitry Andric 
66fcaf7f86SDimitry Andric // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
675f757f3fSDimitry Andric // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
685f757f3fSDimitry Andric // the upper 32 bits with ones.
69bdd1243dSDimitry Andric bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
70fcaf7f86SDimitry Andric   if (!ST->is64Bit())
71fcaf7f86SDimitry Andric     return false;
72fcaf7f86SDimitry Andric 
73bdd1243dSDimitry Andric   if (!BO.getType()->isIntegerTy(64))
74fcaf7f86SDimitry Andric     return false;
75fcaf7f86SDimitry Andric 
767a6dacacSDimitry Andric   using namespace PatternMatch;
775f757f3fSDimitry Andric 
787a6dacacSDimitry Andric   // Left hand side should be a zext nneg.
797a6dacacSDimitry Andric   Value *LHSSrc;
807a6dacacSDimitry Andric   if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc))))
81fcaf7f86SDimitry Andric     return false;
82fcaf7f86SDimitry Andric 
83fcaf7f86SDimitry Andric   if (!LHSSrc->getType()->isIntegerTy(32))
84fcaf7f86SDimitry Andric     return false;
85fcaf7f86SDimitry Andric 
86fcaf7f86SDimitry Andric   // Right hand side should be a constant.
87bdd1243dSDimitry Andric   Value *RHS = BO.getOperand(1);
88fcaf7f86SDimitry Andric 
89fcaf7f86SDimitry Andric   auto *CI = dyn_cast<ConstantInt>(RHS);
90fcaf7f86SDimitry Andric   if (!CI)
91fcaf7f86SDimitry Andric     return false;
92fcaf7f86SDimitry Andric   uint64_t C = CI->getZExtValue();
93fcaf7f86SDimitry Andric 
94fcaf7f86SDimitry Andric   // Look for constants that fit in 32 bits but not simm12, and can be made
95fcaf7f86SDimitry Andric   // into simm12 by sign extending bit 31. This will allow use of ANDI.
96fcaf7f86SDimitry Andric   // TODO: Is worth making simm32?
97fcaf7f86SDimitry Andric   if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
98fcaf7f86SDimitry Andric     return false;
99fcaf7f86SDimitry Andric 
100fcaf7f86SDimitry Andric   // Sign extend the constant and replace the And operand.
101fcaf7f86SDimitry Andric   C = SignExtend64<32>(C);
1027a6dacacSDimitry Andric   BO.setOperand(1, ConstantInt::get(RHS->getType(), C));
1037a6dacacSDimitry Andric 
1047a6dacacSDimitry Andric   return true;
1057a6dacacSDimitry Andric }
1067a6dacacSDimitry Andric 
1077a6dacacSDimitry Andric // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
1087a6dacacSDimitry Andric // reduction instructions write the result in the first element of a vector
1097a6dacacSDimitry Andric // register. So when a reduction in a loop uses a scalar phi, we end up with
1107a6dacacSDimitry Andric // unnecessary scalar moves:
1117a6dacacSDimitry Andric //
1127a6dacacSDimitry Andric // loop:
1137a6dacacSDimitry Andric // vfmv.s.f v10, fa0
1147a6dacacSDimitry Andric // vfredosum.vs v8, v8, v10
1157a6dacacSDimitry Andric // vfmv.f.s fa0, v8
1167a6dacacSDimitry Andric //
1177a6dacacSDimitry Andric // This mainly affects ordered fadd reductions, since other types of reduction
1187a6dacacSDimitry Andric // typically use element-wise vectorisation in the loop body. This tries to
1197a6dacacSDimitry Andric // vectorize any scalar phis that feed into a fadd reduction:
1207a6dacacSDimitry Andric //
1217a6dacacSDimitry Andric // loop:
1227a6dacacSDimitry Andric // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
123*0fca6ea1SDimitry Andric // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,
124*0fca6ea1SDimitry Andric //                                                    <vscale x 2 x float> %vec)
1257a6dacacSDimitry Andric //
1267a6dacacSDimitry Andric // ->
1277a6dacacSDimitry Andric //
1287a6dacacSDimitry Andric // loop:
1297a6dacacSDimitry Andric // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
1307a6dacacSDimitry Andric // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
131*0fca6ea1SDimitry Andric // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,
132*0fca6ea1SDimitry Andric //                                                    <vscale x 2 x float> %vec)
1337a6dacacSDimitry Andric // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
1347a6dacacSDimitry Andric //
1357a6dacacSDimitry Andric // Which eliminates the scalar -> vector -> scalar crossing during instruction
1367a6dacacSDimitry Andric // selection.
1377a6dacacSDimitry Andric bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
138*0fca6ea1SDimitry Andric   if (expandVPStrideLoad(I))
139*0fca6ea1SDimitry Andric     return true;
140*0fca6ea1SDimitry Andric 
1417a6dacacSDimitry Andric   if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
1427a6dacacSDimitry Andric     return false;
1437a6dacacSDimitry Andric 
1447a6dacacSDimitry Andric   auto *PHI = dyn_cast<PHINode>(I.getOperand(0));
1457a6dacacSDimitry Andric   if (!PHI || !PHI->hasOneUse() ||
1467a6dacacSDimitry Andric       !llvm::is_contained(PHI->incoming_values(), &I))
1477a6dacacSDimitry Andric     return false;
1487a6dacacSDimitry Andric 
1497a6dacacSDimitry Andric   Type *VecTy = I.getOperand(1)->getType();
1507a6dacacSDimitry Andric   IRBuilder<> Builder(PHI);
1517a6dacacSDimitry Andric   auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues());
1527a6dacacSDimitry Andric 
1537a6dacacSDimitry Andric   for (auto *BB : PHI->blocks()) {
1547a6dacacSDimitry Andric     Builder.SetInsertPoint(BB->getTerminator());
1557a6dacacSDimitry Andric     Value *InsertElt = Builder.CreateInsertElement(
1567a6dacacSDimitry Andric         VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0);
1577a6dacacSDimitry Andric     VecPHI->addIncoming(InsertElt, BB);
1587a6dacacSDimitry Andric   }
1597a6dacacSDimitry Andric 
1607a6dacacSDimitry Andric   Builder.SetInsertPoint(&I);
1617a6dacacSDimitry Andric   I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0));
1627a6dacacSDimitry Andric 
1637a6dacacSDimitry Andric   PHI->eraseFromParent();
164fcaf7f86SDimitry Andric 
165fcaf7f86SDimitry Andric   return true;
166fcaf7f86SDimitry Andric }
167fcaf7f86SDimitry Andric 
168*0fca6ea1SDimitry Andric // Always expand zero strided loads so we match more .vx splat patterns, even if
169*0fca6ea1SDimitry Andric // we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
170*0fca6ea1SDimitry Andric // it back to a strided load if it's optimized.
171*0fca6ea1SDimitry Andric bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
172*0fca6ea1SDimitry Andric   Value *BasePtr, *VL;
173*0fca6ea1SDimitry Andric 
174*0fca6ea1SDimitry Andric   using namespace PatternMatch;
175*0fca6ea1SDimitry Andric   if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
176*0fca6ea1SDimitry Andric                       m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))
177*0fca6ea1SDimitry Andric     return false;
178*0fca6ea1SDimitry Andric 
179*0fca6ea1SDimitry Andric   // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
180*0fca6ea1SDimitry Andric   // avoid expanding here.
181*0fca6ea1SDimitry Andric   if (II.getType()->getScalarSizeInBits() > ST->getXLen())
182*0fca6ea1SDimitry Andric     return false;
183*0fca6ea1SDimitry Andric 
184*0fca6ea1SDimitry Andric   if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))
185*0fca6ea1SDimitry Andric     return false;
186*0fca6ea1SDimitry Andric 
187*0fca6ea1SDimitry Andric   auto *VTy = cast<VectorType>(II.getType());
188*0fca6ea1SDimitry Andric 
189*0fca6ea1SDimitry Andric   IRBuilder<> Builder(&II);
190*0fca6ea1SDimitry Andric 
191*0fca6ea1SDimitry Andric   // Extend VL from i32 to XLen if needed.
192*0fca6ea1SDimitry Andric   if (ST->is64Bit())
193*0fca6ea1SDimitry Andric     VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
194*0fca6ea1SDimitry Andric 
195*0fca6ea1SDimitry Andric   Type *STy = VTy->getElementType();
196*0fca6ea1SDimitry Andric   Value *Val = Builder.CreateLoad(STy, BasePtr);
197*0fca6ea1SDimitry Andric   const auto &TLI = *ST->getTargetLowering();
198*0fca6ea1SDimitry Andric   Value *Res;
199*0fca6ea1SDimitry Andric 
200*0fca6ea1SDimitry Andric   // TODO: Also support fixed/illegal vector types to splat with evl = vl.
201*0fca6ea1SDimitry Andric   if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
202*0fca6ea1SDimitry Andric     unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
203*0fca6ea1SDimitry Andric                                               : Intrinsic::riscv_vmv_v_x;
204*0fca6ea1SDimitry Andric     Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
205*0fca6ea1SDimitry Andric                                   {PoisonValue::get(VTy), Val, VL});
206*0fca6ea1SDimitry Andric   } else {
207*0fca6ea1SDimitry Andric     Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
208*0fca6ea1SDimitry Andric   }
209*0fca6ea1SDimitry Andric 
210*0fca6ea1SDimitry Andric   II.replaceAllUsesWith(Res);
211*0fca6ea1SDimitry Andric   II.eraseFromParent();
212*0fca6ea1SDimitry Andric   return true;
213*0fca6ea1SDimitry Andric }
214*0fca6ea1SDimitry Andric 
215fcaf7f86SDimitry Andric bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
216fcaf7f86SDimitry Andric   if (skipFunction(F))
217fcaf7f86SDimitry Andric     return false;
218fcaf7f86SDimitry Andric 
219fcaf7f86SDimitry Andric   auto &TPC = getAnalysis<TargetPassConfig>();
220fcaf7f86SDimitry Andric   auto &TM = TPC.getTM<RISCVTargetMachine>();
221fcaf7f86SDimitry Andric   ST = &TM.getSubtarget<RISCVSubtarget>(F);
222fcaf7f86SDimitry Andric 
223*0fca6ea1SDimitry Andric   DL = &F.getDataLayout();
224*0fca6ea1SDimitry Andric   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
225fcaf7f86SDimitry Andric 
226fcaf7f86SDimitry Andric   bool MadeChange = false;
227bdd1243dSDimitry Andric   for (auto &BB : F)
228bdd1243dSDimitry Andric     for (Instruction &I : llvm::make_early_inc_range(BB))
229bdd1243dSDimitry Andric       MadeChange |= visit(I);
230fcaf7f86SDimitry Andric 
231fcaf7f86SDimitry Andric   return MadeChange;
232fcaf7f86SDimitry Andric }
233fcaf7f86SDimitry Andric 
234fcaf7f86SDimitry Andric INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
235fcaf7f86SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
236fcaf7f86SDimitry Andric INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
237fcaf7f86SDimitry Andric 
238fcaf7f86SDimitry Andric char RISCVCodeGenPrepare::ID = 0;
239fcaf7f86SDimitry Andric 
240fcaf7f86SDimitry Andric FunctionPass *llvm::createRISCVCodeGenPreparePass() {
241fcaf7f86SDimitry Andric   return new RISCVCodeGenPrepare();
242fcaf7f86SDimitry Andric }
243