xref: /llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp (revision 9d81ccc02ffb154cd5ee7ade21740dc4a45f4261)
1 //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the WebAssembly-specific TargetTransformInfo
11 /// implementation.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "WebAssemblyTargetTransformInfo.h"
16 #include "llvm/CodeGen/CostTable.h"
17 #include "llvm/Support/Debug.h"
18 using namespace llvm;
19 
20 #define DEBUG_TYPE "wasmtti"
21 
22 TargetTransformInfo::PopcntSupportKind
23 WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
24   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
25   return TargetTransformInfo::PSK_FastHardware;
26 }
27 
28 unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
29   unsigned Result = BaseT::getNumberOfRegisters(ClassID);
30 
31   // For SIMD, use at least 16 registers, as a rough guess.
32   bool Vector = (ClassID == 1);
33   if (Vector)
34     Result = std::max(Result, 16u);
35 
36   return Result;
37 }
38 
39 unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const {
40   if (Vector && getST()->hasSIMD128())
41     return 128;
42 
43   return 64;
44 }
45 
46 unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
47     unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
48     TTI::OperandValueKind Opd1Info,
49     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
50     TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
51     const Instruction *CxtI) {
52 
53   unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
54       Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
55 
56   if (auto *VTy = dyn_cast<VectorType>(Ty)) {
57     switch (Opcode) {
58     case Instruction::LShr:
59     case Instruction::AShr:
60     case Instruction::Shl:
61       // SIMD128's shifts currently only accept a scalar shift count. For each
62       // element, we'll need to extract, op, insert. The following is a rough
63       // approxmation.
64       if (Opd2Info != TTI::OK_UniformValue &&
65           Opd2Info != TTI::OK_UniformConstantValue)
66         Cost =
67             cast<FixedVectorType>(VTy)->getNumElements() *
68             (TargetTransformInfo::TCC_Basic +
69              getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
70              TargetTransformInfo::TCC_Basic);
71       break;
72     }
73   }
74   return Cost;
75 }
76 
77 unsigned WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
78                                                 unsigned Index) {
79   unsigned Cost = BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index);
80 
81   // SIMD128's insert/extract currently only take constant indices.
82   if (Index == -1u)
83     return Cost + 25 * TargetTransformInfo::TCC_Expensive;
84 
85   return Cost;
86 }
87 
88 bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
89                                              const Function *Callee) const {
90   // Allow inlining only when the Callee has a subset of the Caller's
91   // features. In principle, we should be able to inline regardless of any
92   // features because WebAssembly supports features at module granularity, not
93   // function granularity, but without this restriction it would be possible for
94   // a module to "forget" about features if all the functions that used them
95   // were inlined.
96   const TargetMachine &TM = getTLI()->getTargetMachine();
97 
98   const FeatureBitset &CallerBits =
99       TM.getSubtargetImpl(*Caller)->getFeatureBits();
100   const FeatureBitset &CalleeBits =
101       TM.getSubtargetImpl(*Callee)->getFeatureBits();
102 
103   return (CallerBits & CalleeBits) == CalleeBits;
104 }
105 
106 void WebAssemblyTTIImpl::getUnrollingPreferences(
107   Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) const {
108   // Scan the loop: don't unroll loops with calls. This is a standard approach
109   // for most (all?) targets.
110   for (BasicBlock *BB : L->blocks())
111     for (Instruction &I : *BB)
112       if (isa<CallInst>(I) || isa<InvokeInst>(I))
113         if (const Function *F = cast<CallBase>(I).getCalledFunction())
114           if (isLoweredToCall(F))
115             return;
116 
117   // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
118   // the various microarchitectures that use the BasicTTI implementation and
119   // has been selected through heuristics across multiple cores and runtimes.
120   UP.Partial = UP.Runtime = UP.UpperBound = true;
121   UP.PartialThreshold = 30;
122 
123   // Avoid unrolling when optimizing for size.
124   UP.OptSizeThreshold = 0;
125   UP.PartialOptSizeThreshold = 0;
126 
127   // Set number of instructions optimized when "back edge"
128   // becomes "fall through" to default value of 2.
129   UP.BEInsns = 2;
130 }
131