xref: /llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp (revision e26af0938c7a272cf0de11c92aa069485868e130)
1 //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the WebAssembly-specific TargetTransformInfo
11 /// implementation.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "WebAssemblyTargetTransformInfo.h"
16 using namespace llvm;
17 
18 #define DEBUG_TYPE "wasmtti"
19 
20 TargetTransformInfo::PopcntSupportKind
21 WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
22   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
23   return TargetTransformInfo::PSK_FastHardware;
24 }
25 
26 unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
27   unsigned Result = BaseT::getNumberOfRegisters(ClassID);
28 
29   // For SIMD, use at least 16 registers, as a rough guess.
30   bool Vector = (ClassID == 1);
31   if (Vector)
32     Result = std::max(Result, 16u);
33 
34   return Result;
35 }
36 
37 TypeSize WebAssemblyTTIImpl::getRegisterBitWidth(
38     TargetTransformInfo::RegisterKind K) const {
39   switch (K) {
40   case TargetTransformInfo::RGK_Scalar:
41     return TypeSize::getFixed(64);
42   case TargetTransformInfo::RGK_FixedWidthVector:
43     return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
44   case TargetTransformInfo::RGK_ScalableVector:
45     return TypeSize::getScalable(0);
46   }
47 
48   llvm_unreachable("Unsupported register kind");
49 }
50 
51 InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(
52     unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
53     TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
54     ArrayRef<const Value *> Args,
55     const Instruction *CxtI) {
56 
57   InstructionCost Cost =
58       BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
59           Opcode, Ty, CostKind, Op1Info, Op2Info);
60 
61   if (auto *VTy = dyn_cast<VectorType>(Ty)) {
62     switch (Opcode) {
63     case Instruction::LShr:
64     case Instruction::AShr:
65     case Instruction::Shl:
66       // SIMD128's shifts currently only accept a scalar shift count. For each
67       // element, we'll need to extract, op, insert. The following is a rough
68       // approximation.
69       if (!Op2Info.isUniform())
70         Cost =
71             cast<FixedVectorType>(VTy)->getNumElements() *
72             (TargetTransformInfo::TCC_Basic +
73              getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
74              TargetTransformInfo::TCC_Basic);
75       break;
76     }
77   }
78   return Cost;
79 }
80 
81 InstructionCost
82 WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
83                                        TTI::TargetCostKind CostKind,
84                                        unsigned Index, Value *Op0, Value *Op1) {
85   InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost(
86       Opcode, Val, CostKind, Index, Op0, Op1);
87 
88   // SIMD128's insert/extract currently only take constant indices.
89   if (Index == -1u)
90     return Cost + 25 * TargetTransformInfo::TCC_Expensive;
91 
92   return Cost;
93 }
94 
95 TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle(
96     const IntrinsicInst *II) const {
97 
98   switch (II->getIntrinsicID()) {
99   default:
100     break;
101   case Intrinsic::vector_reduce_fadd:
102     return TTI::ReductionShuffle::Pairwise;
103   }
104   return TTI::ReductionShuffle::SplitHalf;
105 }
106 
107 void WebAssemblyTTIImpl::getUnrollingPreferences(
108     Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
109     OptimizationRemarkEmitter *ORE) const {
110   // Scan the loop: don't unroll loops with calls. This is a standard approach
111   // for most (all?) targets.
112   for (BasicBlock *BB : L->blocks())
113     for (Instruction &I : *BB)
114       if (isa<CallInst>(I) || isa<InvokeInst>(I))
115         if (const Function *F = cast<CallBase>(I).getCalledFunction())
116           if (isLoweredToCall(F))
117             return;
118 
119   // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
120   // the various microarchitectures that use the BasicTTI implementation and
121   // has been selected through heuristics across multiple cores and runtimes.
122   UP.Partial = UP.Runtime = UP.UpperBound = true;
123   UP.PartialThreshold = 30;
124 
125   // Avoid unrolling when optimizing for size.
126   UP.OptSizeThreshold = 0;
127   UP.PartialOptSizeThreshold = 0;
128 
129   // Set number of instructions optimized when "back edge"
130   // becomes "fall through" to default value of 2.
131   UP.BEInsns = 2;
132 }
133 
134 bool WebAssemblyTTIImpl::supportsTailCalls() const {
135   return getST()->hasTailCall();
136 }
137 
138 bool WebAssemblyTTIImpl::isProfitableToSinkOperands(
139     Instruction *I, SmallVectorImpl<Use *> &Ops) const {
140   using namespace llvm::PatternMatch;
141 
142   if (!I->getType()->isVectorTy() || !I->isShift())
143     return false;
144 
145   Value *V = I->getOperand(1);
146   // We dont need to sink constant splat.
147   if (dyn_cast<Constant>(V))
148     return false;
149 
150   if (match(V, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()),
151                          m_Value(), m_ZeroMask()))) {
152     // Sink insert
153     Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));
154     // Sink shuffle
155     Ops.push_back(&I->getOperandUse(1));
156     return true;
157   }
158 
159   return false;
160 }
161