1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19
20 #include "ARM.h"
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/SubtargetFeature.h"
29
30 namespace llvm {
31
32 class APInt;
33 class ARMTargetLowering;
34 class Instruction;
35 class Loop;
36 class SCEV;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40
41 namespace TailPredication {
42 enum Mode {
43 Disabled = 0,
44 EnabledNoReductions,
45 Enabled,
46 ForceEnabledNoReductions,
47 ForceEnabled
48 };
49 }
50
51 // For controlling conversion of memcpy into Tail Predicated loop.
52 namespace TPLoop {
53 enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
54 }
55
56 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
57 using BaseT = BasicTTIImplBase<ARMTTIImpl>;
58 using TTI = TargetTransformInfo;
59
60 friend BaseT;
61
62 const ARMSubtarget *ST;
63 const ARMTargetLowering *TLI;
64
65 // Currently the following features are excluded from InlineFeaturesAllowed.
66 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
67 // Depending on whether they are set or unset, different
68 // instructions/registers are available. For example, inlining a callee with
69 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
70 // fail if the callee uses ARM only instructions, e.g. in inline asm.
71 const FeatureBitset InlineFeaturesAllowed = {
72 ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
73 ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
74 ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
75 ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
76 ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
77 ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
78 ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
79 ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
80 ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
81 ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
82 ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
83 ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
84 ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
85 ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
86 ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
87 ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
88 ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
89 ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
90 ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
91 ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
92 ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
93 ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
94 ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
95 ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
96 };
97
getST()98 const ARMSubtarget *getST() const { return ST; }
getTLI()99 const ARMTargetLowering *getTLI() const { return TLI; }
100
101 public:
ARMTTIImpl(const ARMBaseTargetMachine * TM,const Function & F)102 explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
103 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
104 TLI(ST->getTargetLowering()) {}
105
106 bool areInlineCompatible(const Function *Caller,
107 const Function *Callee) const;
108
enableInterleavedAccessVectorization()109 bool enableInterleavedAccessVectorization() { return true; }
110
111 TTI::AddressingModeKind
112 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
113
114 /// Floating-point computation using ARMv8 AArch32 Advanced
115 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
116 /// and Arm MVE are IEEE-754 compliant.
isFPVectorizationPotentiallyUnsafe()117 bool isFPVectorizationPotentiallyUnsafe() {
118 return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
119 }
120
121 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
122 IntrinsicInst &II) const;
123
124 /// \name Scalar TTI Implementations
125 /// @{
126
127 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
128 Type *Ty);
129
130 using BaseT::getIntImmCost;
131 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
132 TTI::TargetCostKind CostKind);
133
134 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
135 const APInt &Imm, Type *Ty,
136 TTI::TargetCostKind CostKind,
137 Instruction *Inst = nullptr);
138
139 /// @}
140
141 /// \name Vector TTI Implementations
142 /// @{
143
getNumberOfRegisters(unsigned ClassID)144 unsigned getNumberOfRegisters(unsigned ClassID) const {
145 bool Vector = (ClassID == 1);
146 if (Vector) {
147 if (ST->hasNEON())
148 return 16;
149 if (ST->hasMVEIntegerOps())
150 return 8;
151 return 0;
152 }
153
154 if (ST->isThumb1Only())
155 return 8;
156 return 13;
157 }
158
getRegisterBitWidth(TargetTransformInfo::RegisterKind K)159 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
160 switch (K) {
161 case TargetTransformInfo::RGK_Scalar:
162 return TypeSize::getFixed(32);
163 case TargetTransformInfo::RGK_FixedWidthVector:
164 if (ST->hasNEON())
165 return TypeSize::getFixed(128);
166 if (ST->hasMVEIntegerOps())
167 return TypeSize::getFixed(128);
168 return TypeSize::getFixed(0);
169 case TargetTransformInfo::RGK_ScalableVector:
170 return TypeSize::getScalable(0);
171 }
172 llvm_unreachable("Unsupported register kind");
173 }
174
getMaxInterleaveFactor(unsigned VF)175 unsigned getMaxInterleaveFactor(unsigned VF) {
176 return ST->getMaxInterleaveFactor();
177 }
178
179 bool isProfitableLSRChainElement(Instruction *I);
180
181 bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
182
isLegalMaskedStore(Type * DataTy,Align Alignment)183 bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
184 return isLegalMaskedLoad(DataTy, Alignment);
185 }
186
187 bool isLegalMaskedGather(Type *Ty, Align Alignment);
188
isLegalMaskedScatter(Type * Ty,Align Alignment)189 bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
190 return isLegalMaskedGather(Ty, Alignment);
191 }
192
193 InstructionCost getMemcpyCost(const Instruction *I);
194
195 int getNumMemOps(const IntrinsicInst *I) const;
196
197 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
198 ArrayRef<int> Mask, int Index,
199 VectorType *SubTp);
200
201 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
202 TTI::ReductionFlags Flags) const;
203
204 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
205 TTI::ReductionFlags Flags) const;
206
shouldExpandReduction(const IntrinsicInst * II)207 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
208
209 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
210 const Instruction *I = nullptr);
211
212 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
213 TTI::CastContextHint CCH,
214 TTI::TargetCostKind CostKind,
215 const Instruction *I = nullptr);
216
217 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
218 CmpInst::Predicate VecPred,
219 TTI::TargetCostKind CostKind,
220 const Instruction *I = nullptr);
221
222 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
223 unsigned Index);
224
225 InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
226 const SCEV *Ptr);
227
228 InstructionCost getArithmeticInstrCost(
229 unsigned Opcode, Type *Ty,
230 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
231 TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
232 TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
233 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
234 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
235 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
236 const Instruction *CxtI = nullptr);
237
238 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
239 MaybeAlign Alignment, unsigned AddressSpace,
240 TTI::TargetCostKind CostKind,
241 const Instruction *I = nullptr);
242
243 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
244 Align Alignment, unsigned AddressSpace,
245 TTI::TargetCostKind CostKind);
246
247 InstructionCost getInterleavedMemoryOpCost(
248 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
249 Align Alignment, unsigned AddressSpace,
250 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
251 bool UseMaskForCond = false, bool UseMaskForGaps = false);
252
253 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
254 const Value *Ptr, bool VariableMask,
255 Align Alignment,
256 TTI::TargetCostKind CostKind,
257 const Instruction *I = nullptr);
258
259 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
260 bool IsPairwiseForm,
261 TTI::TargetCostKind CostKind);
262 InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
263 Type *ResTy, VectorType *ValTy,
264 TTI::TargetCostKind CostKind);
265
266 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
267 TTI::TargetCostKind CostKind);
268
269 bool maybeLoweredToCall(Instruction &I);
270 bool isLoweredToCall(const Function *F);
271 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
272 AssumptionCache &AC,
273 TargetLibraryInfo *LibInfo,
274 HardwareLoopInfo &HWLoopInfo);
275 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
276 ScalarEvolution &SE,
277 AssumptionCache &AC,
278 TargetLibraryInfo *TLI,
279 DominatorTree *DT,
280 const LoopAccessInfo *LAI);
281 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
282 TTI::UnrollingPreferences &UP);
283
284 bool emitGetActiveLaneMask() const;
285
286 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
287 TTI::PeelingPreferences &PP);
shouldBuildLookupTablesForConstant(Constant * C)288 bool shouldBuildLookupTablesForConstant(Constant *C) const {
289 // In the ROPI and RWPI relocation models we can't have pointers to global
290 // variables or functions in constant data, so don't convert switches to
291 // lookup tables if any of the values would need relocation.
292 if (ST->isROPI() || ST->isRWPI())
293 return !C->needsDynamicRelocation();
294
295 return true;
296 }
297 /// @}
298 };
299
300 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
301 /// instruction with the specified blocksize. (The order of the elements
302 /// within each block of the vector is reversed.)
isVREVMask(ArrayRef<int> M,EVT VT,unsigned BlockSize)303 inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
304 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
305 "Only possible block sizes for VREV are: 16, 32, 64");
306
307 unsigned EltSz = VT.getScalarSizeInBits();
308 if (EltSz != 8 && EltSz != 16 && EltSz != 32)
309 return false;
310
311 unsigned BlockElts = M[0] + 1;
312 // If the first shuffle index is UNDEF, be optimistic.
313 if (M[0] < 0)
314 BlockElts = BlockSize / EltSz;
315
316 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
317 return false;
318
319 for (unsigned i = 0, e = M.size(); i < e; ++i) {
320 if (M[i] < 0)
321 continue; // ignore UNDEF indices
322 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
323 return false;
324 }
325
326 return true;
327 }
328
329 } // end namespace llvm
330
331 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
332