xref: /llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp (revision 82d5dd28b4de7245088f7ed40da37f8cf80461e4)
1 //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a RISC-V specific version of CodeGenPrepare.
10 // It munges the code in the input function to better prepare it for
11 // SelectionDAG-based code generation. This works around limitations in it's
12 // basic-block-at-a-time approach.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "RISCV.h"
17 #include "RISCVTargetMachine.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/CodeGen/TargetPassConfig.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Intrinsics.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/InitializePasses.h"
27 #include "llvm/Pass.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "riscv-codegenprepare"
32 #define PASS_NAME "RISC-V CodeGenPrepare"
33 
34 namespace {
35 
36 class RISCVCodeGenPrepare : public FunctionPass,
37                             public InstVisitor<RISCVCodeGenPrepare, bool> {
38   const DataLayout *DL;
39   const DominatorTree *DT;
40   const RISCVSubtarget *ST;
41 
42 public:
43   static char ID;
44 
45   RISCVCodeGenPrepare() : FunctionPass(ID) {}
46 
47   bool runOnFunction(Function &F) override;
48 
49   StringRef getPassName() const override { return PASS_NAME; }
50 
51   void getAnalysisUsage(AnalysisUsage &AU) const override {
52     AU.setPreservesCFG();
53     AU.addRequired<DominatorTreeWrapperPass>();
54     AU.addRequired<TargetPassConfig>();
55   }
56 
57   bool visitInstruction(Instruction &I) { return false; }
58   bool visitAnd(BinaryOperator &BO);
59   bool visitIntrinsicInst(IntrinsicInst &I);
60   bool expandVPStrideLoad(IntrinsicInst &I);
61 };
62 
63 } // end anonymous namespace
64 
65 // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
66 // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
67 // the upper 32 bits with ones.
68 bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
69   if (!ST->is64Bit())
70     return false;
71 
72   if (!BO.getType()->isIntegerTy(64))
73     return false;
74 
75   using namespace PatternMatch;
76 
77   // Left hand side should be a zext nneg.
78   Value *LHSSrc;
79   if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc))))
80     return false;
81 
82   if (!LHSSrc->getType()->isIntegerTy(32))
83     return false;
84 
85   // Right hand side should be a constant.
86   Value *RHS = BO.getOperand(1);
87 
88   auto *CI = dyn_cast<ConstantInt>(RHS);
89   if (!CI)
90     return false;
91   uint64_t C = CI->getZExtValue();
92 
93   // Look for constants that fit in 32 bits but not simm12, and can be made
94   // into simm12 by sign extending bit 31. This will allow use of ANDI.
95   // TODO: Is worth making simm32?
96   if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
97     return false;
98 
99   // Sign extend the constant and replace the And operand.
100   C = SignExtend64<32>(C);
101   BO.setOperand(1, ConstantInt::get(RHS->getType(), C));
102 
103   return true;
104 }
105 
106 // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
107 // reduction instructions write the result in the first element of a vector
108 // register. So when a reduction in a loop uses a scalar phi, we end up with
109 // unnecessary scalar moves:
110 //
111 // loop:
112 // vfmv.s.f v10, fa0
113 // vfredosum.vs v8, v8, v10
114 // vfmv.f.s fa0, v8
115 //
116 // This mainly affects ordered fadd reductions, since other types of reduction
117 // typically use element-wise vectorisation in the loop body. This tries to
118 // vectorize any scalar phis that feed into a fadd reduction:
119 //
120 // loop:
121 // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
122 // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,
123 //                                                    <vscale x 2 x float> %vec)
124 //
125 // ->
126 //
127 // loop:
128 // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
129 // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
130 // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,
131 //                                                    <vscale x 2 x float> %vec)
132 // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
133 //
134 // Which eliminates the scalar -> vector -> scalar crossing during instruction
135 // selection.
136 bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
137   if (expandVPStrideLoad(I))
138     return true;
139 
140   if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
141     return false;
142 
143   auto *PHI = dyn_cast<PHINode>(I.getOperand(0));
144   if (!PHI || !PHI->hasOneUse() ||
145       !llvm::is_contained(PHI->incoming_values(), &I))
146     return false;
147 
148   Type *VecTy = I.getOperand(1)->getType();
149   IRBuilder<> Builder(PHI);
150   auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues());
151 
152   for (auto *BB : PHI->blocks()) {
153     Builder.SetInsertPoint(BB->getTerminator());
154     Value *InsertElt = Builder.CreateInsertElement(
155         VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0);
156     VecPHI->addIncoming(InsertElt, BB);
157   }
158 
159   Builder.SetInsertPoint(&I);
160   I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0));
161 
162   PHI->eraseFromParent();
163 
164   return true;
165 }
166 
167 // Always expand zero strided loads so we match more .vx splat patterns, even if
168 // we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
169 // it back to a strided load if it's optimized.
170 bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
171   Value *BasePtr, *VL;
172 
173   using namespace PatternMatch;
174   if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
175                       m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))
176     return false;
177 
178   // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
179   // avoid expanding here.
180   if (II.getType()->getScalarSizeInBits() > ST->getXLen())
181     return false;
182 
183   if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))
184     return false;
185 
186   auto *VTy = cast<VectorType>(II.getType());
187 
188   IRBuilder<> Builder(&II);
189   Type *STy = VTy->getElementType();
190   Value *Val = Builder.CreateLoad(STy, BasePtr);
191   Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
192                                        {Val, II.getOperand(2), VL});
193 
194   II.replaceAllUsesWith(Res);
195   II.eraseFromParent();
196   return true;
197 }
198 
199 bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
200   if (skipFunction(F))
201     return false;
202 
203   auto &TPC = getAnalysis<TargetPassConfig>();
204   auto &TM = TPC.getTM<RISCVTargetMachine>();
205   ST = &TM.getSubtarget<RISCVSubtarget>(F);
206 
207   DL = &F.getDataLayout();
208   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
209 
210   bool MadeChange = false;
211   for (auto &BB : F)
212     for (Instruction &I : llvm::make_early_inc_range(BB))
213       MadeChange |= visit(I);
214 
215   return MadeChange;
216 }
217 
218 INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
219 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
220 INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
221 
222 char RISCVCodeGenPrepare::ID = 0;
223 
224 FunctionPass *llvm::createRISCVCodeGenPreparePass() {
225   return new RISCVCodeGenPrepare();
226 }
227