10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "AArch64Subtarget.h" 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AArch64.h" 160b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h" 180b57cec5SDimitry Andric #include "AArch64TargetMachine.h" 195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h" 205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h" 215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h" 220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h" 260b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 270fca6ea1SDimitry Andric #include "llvm/Support/SipHash.h" 2806c3fb27SDimitry Andric #include "llvm/TargetParser/AArch64TargetParser.h" 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric using namespace llvm; 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget" 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR 350b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC 360b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc" 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric static cl::opt<bool> 390b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " 400b57cec5SDimitry Andric "converter pass"), cl::init(true), cl::Hidden); 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it. 430b57cec5SDimitry Andric static cl::opt<bool> 440b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " 450b57cec5SDimitry Andric "an address is ignored"), cl::init(false), cl::Hidden); 460b57cec5SDimitry Andric 470fca6ea1SDimitry Andric static cl::opt<bool> MachOUseNonLazyBind( 480fca6ea1SDimitry Andric "aarch64-macho-enable-nonlazybind", 490fca6ea1SDimitry Andric cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"), 500fca6ea1SDimitry Andric cl::Hidden); 510b57cec5SDimitry Andric 52fe6060f1SDimitry Andric static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true), 53fe6060f1SDimitry Andric cl::desc("Enable the use of AA during codegen.")); 545ffd83dbSDimitry Andric 5581ad6265SDimitry Andric static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost( 5681ad6265SDimitry Andric "aarch64-insert-extract-base-cost", 5781ad6265SDimitry Andric cl::desc("Base cost of vector insert/extract element"), cl::Hidden); 5881ad6265SDimitry Andric 59bdd1243dSDimitry Andric // Reserve a list of X# registers, so they are unavailable for register 60bdd1243dSDimitry Andric // allocator, but can still be used as ABI requests, such as passing arguments 61bdd1243dSDimitry Andric // to function call. 62bdd1243dSDimitry Andric static cl::list<std::string> 63bdd1243dSDimitry Andric ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical " 64bdd1243dSDimitry Andric "registers, so they can't be used by register allocator. " 65bdd1243dSDimitry Andric "Should only be used for testing register allocator."), 66bdd1243dSDimitry Andric cl::CommaSeparated, cl::Hidden); 67bdd1243dSDimitry Andric 685f757f3fSDimitry Andric static cl::opt<AArch64PAuth::AuthCheckMethod> 695f757f3fSDimitry Andric AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method", 705f757f3fSDimitry Andric cl::Hidden, 715f757f3fSDimitry Andric cl::desc("Override the variant of check applied " 725f757f3fSDimitry Andric "to authenticated LR during tail call"), 735f757f3fSDimitry Andric cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR)); 745f757f3fSDimitry Andric 755f757f3fSDimitry Andric static cl::opt<unsigned> AArch64MinimumJumpTableEntries( 765f757f3fSDimitry Andric "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden, 775f757f3fSDimitry Andric cl::desc("Set minimum number of entries to use a jump table on AArch64")); 785f757f3fSDimitry Andric 7981ad6265SDimitry Andric unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { 8081ad6265SDimitry Andric if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) 8181ad6265SDimitry Andric return OverrideVectorInsertExtractBaseCost; 8281ad6265SDimitry Andric return VectorInsertExtractBaseCost; 8381ad6265SDimitry Andric } 8481ad6265SDimitry Andric 85349cc55cSDimitry Andric AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( 865f757f3fSDimitry Andric StringRef FS, StringRef CPUString, StringRef TuneCPUString, 875f757f3fSDimitry Andric bool HasMinSize) { 880b57cec5SDimitry Andric // Determine default and user-specified characteristics 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric if (CPUString.empty()) 910b57cec5SDimitry Andric CPUString = "generic"; 920b57cec5SDimitry Andric 93349cc55cSDimitry Andric if (TuneCPUString.empty()) 94349cc55cSDimitry Andric TuneCPUString = CPUString; 95349cc55cSDimitry Andric 96349cc55cSDimitry Andric ParseSubtargetFeatures(CPUString, TuneCPUString, FS); 975f757f3fSDimitry Andric initializeProperties(HasMinSize); 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric return *this; 1000b57cec5SDimitry Andric } 1010b57cec5SDimitry Andric 1025f757f3fSDimitry Andric void AArch64Subtarget::initializeProperties(bool HasMinSize) { 1030b57cec5SDimitry Andric // Initialize CPU specific properties. We should add a tablegen feature for 1040b57cec5SDimitry Andric // this in the future so we can specify it together with the subtarget 1050b57cec5SDimitry Andric // features. 1060b57cec5SDimitry Andric switch (ARMProcFamily) { 1070b57cec5SDimitry Andric case Others: 1080b57cec5SDimitry Andric break; 1095ffd83dbSDimitry Andric case Carmel: 1105ffd83dbSDimitry Andric CacheLineSize = 64; 1115ffd83dbSDimitry Andric break; 1120b57cec5SDimitry Andric case CortexA35: 1130b57cec5SDimitry Andric case CortexA53: 1140b57cec5SDimitry Andric case CortexA55: 1150fca6ea1SDimitry Andric case CortexR82: 1160fca6ea1SDimitry Andric case CortexR82AE: 11706c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 11806c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 11981ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 1200b57cec5SDimitry Andric break; 1210b57cec5SDimitry Andric case CortexA57: 1220b57cec5SDimitry Andric MaxInterleaveFactor = 4; 12306c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 12406c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 12581ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 1268bcb0991SDimitry Andric break; 1278bcb0991SDimitry Andric case CortexA65: 12806c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 1290b57cec5SDimitry Andric break; 1300b57cec5SDimitry Andric case CortexA72: 1310b57cec5SDimitry Andric case CortexA73: 1320b57cec5SDimitry Andric case CortexA75: 13306c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 13406c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 13581ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 13681ad6265SDimitry Andric break; 1370b57cec5SDimitry Andric case CortexA76: 1385ffd83dbSDimitry Andric case CortexA77: 1395ffd83dbSDimitry Andric case CortexA78: 1400fca6ea1SDimitry Andric case CortexA78AE: 141e8d8bef9SDimitry Andric case CortexA78C: 1425ffd83dbSDimitry Andric case CortexX1: 14306c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 14406c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 14581ad6265SDimitry Andric MaxBytesForLoopAlignment = 16; 1460b57cec5SDimitry Andric break; 147349cc55cSDimitry Andric case CortexA510: 1485f757f3fSDimitry Andric case CortexA520: 14906c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 15081ad6265SDimitry Andric VScaleForTuning = 1; 15106c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 15281ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 15381ad6265SDimitry Andric break; 154349cc55cSDimitry Andric case CortexA710: 155bdd1243dSDimitry Andric case CortexA715: 1565f757f3fSDimitry Andric case CortexA720: 1570fca6ea1SDimitry Andric case CortexA725: 158349cc55cSDimitry Andric case CortexX2: 159bdd1243dSDimitry Andric case CortexX3: 1605f757f3fSDimitry Andric case CortexX4: 1610fca6ea1SDimitry Andric case CortexX925: 16206c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 163349cc55cSDimitry Andric VScaleForTuning = 1; 16406c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 16581ad6265SDimitry Andric MaxBytesForLoopAlignment = 16; 166349cc55cSDimitry Andric break; 1675ffd83dbSDimitry Andric case A64FX: 1685ffd83dbSDimitry Andric CacheLineSize = 256; 16906c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 17006c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 171e8d8bef9SDimitry Andric MaxInterleaveFactor = 4; 172e8d8bef9SDimitry Andric PrefetchDistance = 128; 173e8d8bef9SDimitry Andric MinPrefetchStride = 1024; 174e8d8bef9SDimitry Andric MaxPrefetchIterationsAhead = 4; 175349cc55cSDimitry Andric VScaleForTuning = 4; 1765ffd83dbSDimitry Andric break; 177480093f4SDimitry Andric case AppleA7: 178480093f4SDimitry Andric case AppleA10: 179480093f4SDimitry Andric case AppleA11: 180480093f4SDimitry Andric case AppleA12: 181480093f4SDimitry Andric case AppleA13: 182e8d8bef9SDimitry Andric case AppleA14: 183bdd1243dSDimitry Andric case AppleA15: 184bdd1243dSDimitry Andric case AppleA16: 1855f757f3fSDimitry Andric case AppleA17: 1860fca6ea1SDimitry Andric case AppleM4: 1870b57cec5SDimitry Andric CacheLineSize = 64; 1880b57cec5SDimitry Andric PrefetchDistance = 280; 1890b57cec5SDimitry Andric MinPrefetchStride = 2048; 1900b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 3; 191bdd1243dSDimitry Andric switch (ARMProcFamily) { 192bdd1243dSDimitry Andric case AppleA14: 193bdd1243dSDimitry Andric case AppleA15: 194bdd1243dSDimitry Andric case AppleA16: 1955f757f3fSDimitry Andric case AppleA17: 1960fca6ea1SDimitry Andric case AppleM4: 197bdd1243dSDimitry Andric MaxInterleaveFactor = 4; 198bdd1243dSDimitry Andric break; 199bdd1243dSDimitry Andric default: 200bdd1243dSDimitry Andric break; 201bdd1243dSDimitry Andric } 2020b57cec5SDimitry Andric break; 2030b57cec5SDimitry Andric case ExynosM3: 2040b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2050b57cec5SDimitry Andric MaxJumpTableSize = 20; 20606c3fb27SDimitry Andric PrefFunctionAlignment = Align(32); 20706c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 2080b57cec5SDimitry Andric break; 2090b57cec5SDimitry Andric case Falkor: 2100b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2110b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2120b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2130b57cec5SDimitry Andric CacheLineSize = 128; 2140b57cec5SDimitry Andric PrefetchDistance = 820; 2150b57cec5SDimitry Andric MinPrefetchStride = 2048; 2160b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 8; 2170b57cec5SDimitry Andric break; 2180b57cec5SDimitry Andric case Kryo: 2190b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2200b57cec5SDimitry Andric VectorInsertExtractBaseCost = 2; 2210b57cec5SDimitry Andric CacheLineSize = 128; 2220b57cec5SDimitry Andric PrefetchDistance = 740; 2230b57cec5SDimitry Andric MinPrefetchStride = 1024; 2240b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 11; 2250b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2260b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2270b57cec5SDimitry Andric break; 2288bcb0991SDimitry Andric case NeoverseE1: 22906c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 2308bcb0991SDimitry Andric break; 2318bcb0991SDimitry Andric case NeoverseN1: 23206c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 23306c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 23404eeddc0SDimitry Andric MaxBytesForLoopAlignment = 16; 235349cc55cSDimitry Andric break; 236e8d8bef9SDimitry Andric case NeoverseN2: 2370fca6ea1SDimitry Andric case NeoverseN3: 238bdd1243dSDimitry Andric case NeoverseV2: 2390fca6ea1SDimitry Andric case NeoverseV3: 24006c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 24106c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 24204eeddc0SDimitry Andric MaxBytesForLoopAlignment = 16; 243349cc55cSDimitry Andric VScaleForTuning = 1; 244349cc55cSDimitry Andric break; 245e8d8bef9SDimitry Andric case NeoverseV1: 24606c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 24706c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 24804eeddc0SDimitry Andric MaxBytesForLoopAlignment = 16; 249349cc55cSDimitry Andric VScaleForTuning = 2; 25006c3fb27SDimitry Andric DefaultSVETFOpts = TailFoldingOpts::Simple; 251349cc55cSDimitry Andric break; 252349cc55cSDimitry Andric case Neoverse512TVB: 25306c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 254349cc55cSDimitry Andric VScaleForTuning = 1; 255349cc55cSDimitry Andric MaxInterleaveFactor = 4; 2568bcb0991SDimitry Andric break; 2570b57cec5SDimitry Andric case Saphira: 2580b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2590b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2600b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2610b57cec5SDimitry Andric break; 2620b57cec5SDimitry Andric case ThunderX2T99: 2630b57cec5SDimitry Andric CacheLineSize = 64; 26406c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 26506c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 2660b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2670b57cec5SDimitry Andric PrefetchDistance = 128; 2680b57cec5SDimitry Andric MinPrefetchStride = 1024; 2690b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 4; 2700b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2710b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2720b57cec5SDimitry Andric break; 2730b57cec5SDimitry Andric case ThunderX: 2740b57cec5SDimitry Andric case ThunderXT88: 2750b57cec5SDimitry Andric case ThunderXT81: 2760b57cec5SDimitry Andric case ThunderXT83: 2770b57cec5SDimitry Andric CacheLineSize = 128; 27806c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 27906c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 2800b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2810b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2820b57cec5SDimitry Andric break; 2830b57cec5SDimitry Andric case TSV110: 2840b57cec5SDimitry Andric CacheLineSize = 64; 28506c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 28606c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 2870b57cec5SDimitry Andric break; 288e837bb5cSDimitry Andric case ThunderX3T110: 289e837bb5cSDimitry Andric CacheLineSize = 64; 29006c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 29106c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 292e837bb5cSDimitry Andric MaxInterleaveFactor = 4; 293e837bb5cSDimitry Andric PrefetchDistance = 128; 294e837bb5cSDimitry Andric MinPrefetchStride = 1024; 295e837bb5cSDimitry Andric MaxPrefetchIterationsAhead = 4; 296e837bb5cSDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 297e837bb5cSDimitry Andric MinVectorRegisterBitWidth = 128; 298e837bb5cSDimitry Andric break; 2992a66634dSDimitry Andric case Ampere1: 300bdd1243dSDimitry Andric case Ampere1A: 3014c2d3b02SDimitry Andric case Ampere1B: 3022a66634dSDimitry Andric CacheLineSize = 64; 30306c3fb27SDimitry Andric PrefFunctionAlignment = Align(64); 30406c3fb27SDimitry Andric PrefLoopAlignment = Align(64); 3052a66634dSDimitry Andric MaxInterleaveFactor = 4; 3062a66634dSDimitry Andric break; 3070fca6ea1SDimitry Andric case Oryon: 3080fca6ea1SDimitry Andric CacheLineSize = 64; 3090fca6ea1SDimitry Andric PrefFunctionAlignment = Align(16); 3100fca6ea1SDimitry Andric MaxInterleaveFactor = 4; 3110fca6ea1SDimitry Andric PrefetchDistance = 128; 3120fca6ea1SDimitry Andric MinPrefetchStride = 1024; 3130fca6ea1SDimitry Andric break; 3140b57cec5SDimitry Andric } 3155f757f3fSDimitry Andric 3165f757f3fSDimitry Andric if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize) 3175f757f3fSDimitry Andric MinimumJumpTableEntries = AArch64MinimumJumpTableEntries; 3180b57cec5SDimitry Andric } 3190b57cec5SDimitry Andric 320bdd1243dSDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, 321bdd1243dSDimitry Andric StringRef TuneCPU, StringRef FS, 322fe6060f1SDimitry Andric const TargetMachine &TM, bool LittleEndian, 323fe6060f1SDimitry Andric unsigned MinSVEVectorSizeInBitsOverride, 324bdd1243dSDimitry Andric unsigned MaxSVEVectorSizeInBitsOverride, 3250fca6ea1SDimitry Andric bool IsStreaming, bool IsStreamingCompatible, 3265f757f3fSDimitry Andric bool HasMinSize) 327349cc55cSDimitry Andric : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS), 3280b57cec5SDimitry Andric ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), 329bdd1243dSDimitry Andric ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()), 3300b57cec5SDimitry Andric CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), 3310fca6ea1SDimitry Andric IsLittle(LittleEndian), IsStreaming(IsStreaming), 3320fca6ea1SDimitry Andric IsStreamingCompatible(IsStreamingCompatible), 333fe6060f1SDimitry Andric MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), 334fe6060f1SDimitry Andric MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), 3355f757f3fSDimitry Andric InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)), 336349cc55cSDimitry Andric TLInfo(TM, *this) { 3370b57cec5SDimitry Andric if (AArch64::isX18ReservedByDefault(TT)) 3380b57cec5SDimitry Andric ReserveXRegister.set(18); 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); 3415ffd83dbSDimitry Andric InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering())); 3420b57cec5SDimitry Andric Legalizer.reset(new AArch64LegalizerInfo(*this)); 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric // FIXME: At this point, we can't rely on Subtarget having RBI. 3470b57cec5SDimitry Andric // It's awkward to mix passing RBI and the Subtarget; should we pass 3480b57cec5SDimitry Andric // TII/TRI as well? 3490b57cec5SDimitry Andric InstSelector.reset(createAArch64InstructionSelector( 3500b57cec5SDimitry Andric *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric RegBankInfo.reset(RBI); 353bdd1243dSDimitry Andric 354bdd1243dSDimitry Andric auto TRI = getRegisterInfo(); 355bdd1243dSDimitry Andric StringSet<> ReservedRegNames; 356bdd1243dSDimitry Andric ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end()); 357bdd1243dSDimitry Andric for (unsigned i = 0; i < 29; ++i) { 358bdd1243dSDimitry Andric if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i))) 359bdd1243dSDimitry Andric ReserveXRegisterForRA.set(i); 360bdd1243dSDimitry Andric } 361bdd1243dSDimitry Andric // X30 is named LR, so we can't use TRI->getName to check X30. 362bdd1243dSDimitry Andric if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR")) 363bdd1243dSDimitry Andric ReserveXRegisterForRA.set(30); 364bdd1243dSDimitry Andric // X29 is named FP, so we can't use TRI->getName to check X29. 365bdd1243dSDimitry Andric if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP")) 366bdd1243dSDimitry Andric ReserveXRegisterForRA.set(29); 3675f757f3fSDimitry Andric 3685f757f3fSDimitry Andric AddressCheckPSV.reset(new AddressCheckPseudoSourceValue(TM)); 3690b57cec5SDimitry Andric } 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const { 3720b57cec5SDimitry Andric return CallLoweringInfo.get(); 3730b57cec5SDimitry Andric } 3740b57cec5SDimitry Andric 3755ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const { 3765ffd83dbSDimitry Andric return InlineAsmLoweringInfo.get(); 3775ffd83dbSDimitry Andric } 3785ffd83dbSDimitry Andric 3798bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const { 3800b57cec5SDimitry Andric return InstSelector.get(); 3810b57cec5SDimitry Andric } 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const { 3840b57cec5SDimitry Andric return Legalizer.get(); 3850b57cec5SDimitry Andric } 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { 3880b57cec5SDimitry Andric return RegBankInfo.get(); 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be 3920b57cec5SDimitry Andric /// referenced for the current subtarget. 3938bcb0991SDimitry Andric unsigned 3940b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, 3950b57cec5SDimitry Andric const TargetMachine &TM) const { 3960b57cec5SDimitry Andric // MachO large model always goes via a GOT, simply to get a single 8-byte 3970b57cec5SDimitry Andric // absolute relocation on all global addresses. 3980b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) 3990b57cec5SDimitry Andric return AArch64II::MO_GOT; 4000b57cec5SDimitry Andric 40106c3fb27SDimitry Andric // All globals dynamically protected by MTE must have their address tags 40206c3fb27SDimitry Andric // synthesized. This is done by having the loader stash the tag in the GOT 40306c3fb27SDimitry Andric // entry. Force all tagged globals (even ones with internal linkage) through 40406c3fb27SDimitry Andric // the GOT. 40506c3fb27SDimitry Andric if (GV->isTagged()) 40606c3fb27SDimitry Andric return AArch64II::MO_GOT; 40706c3fb27SDimitry Andric 4080fca6ea1SDimitry Andric if (!TM.shouldAssumeDSOLocal(GV)) { 409bdd1243dSDimitry Andric if (GV->hasDLLImportStorageClass()) { 4100b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT; 411bdd1243dSDimitry Andric } 4120b57cec5SDimitry Andric if (getTargetTriple().isOSWindows()) 4130b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB; 4140b57cec5SDimitry Andric return AArch64II::MO_GOT; 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric // The small code model's direct accesses use ADRP, which cannot 4180b57cec5SDimitry Andric // necessarily produce the value 0 (if the code is above 4GB). 4190b57cec5SDimitry Andric // Same for the tiny code model, where we have a pc relative LDR. 4200b57cec5SDimitry Andric if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) && 4210b57cec5SDimitry Andric GV->hasExternalWeakLinkage()) 4220b57cec5SDimitry Andric return AArch64II::MO_GOT; 4230b57cec5SDimitry Andric 4248bcb0991SDimitry Andric // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate 4258bcb0991SDimitry Andric // that their nominal addresses are tagged and outside of the code model. In 4268bcb0991SDimitry Andric // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the 4278bcb0991SDimitry Andric // tag if necessary based on MO_TAGGED. 4288bcb0991SDimitry Andric if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType())) 4298bcb0991SDimitry Andric return AArch64II::MO_NC | AArch64II::MO_TAGGED; 4308bcb0991SDimitry Andric 4310b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric 4348bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference( 4350b57cec5SDimitry Andric const GlobalValue *GV, const TargetMachine &TM) const { 4360b57cec5SDimitry Andric // MachO large model always goes via a GOT, because we don't have the 4370b57cec5SDimitry Andric // relocations available to do anything else.. 4380b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && 4390b57cec5SDimitry Andric !GV->hasInternalLinkage()) 4400b57cec5SDimitry Andric return AArch64II::MO_GOT; 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric // NonLazyBind goes via GOT unless we know it's available locally. 4430b57cec5SDimitry Andric auto *F = dyn_cast<Function>(GV); 4440fca6ea1SDimitry Andric if ((!isTargetMachO() || MachOUseNonLazyBind) && F && 4450fca6ea1SDimitry Andric F->hasFnAttribute(Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV)) 4460b57cec5SDimitry Andric return AArch64II::MO_GOT; 4470b57cec5SDimitry Andric 448bdd1243dSDimitry Andric if (getTargetTriple().isOSWindows()) { 4497a6dacacSDimitry Andric if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) { 4507a6dacacSDimitry Andric if (GV->hasDLLImportStorageClass()) { 4517a6dacacSDimitry Andric // On Arm64EC, if we're calling a symbol from the import table 4527a6dacacSDimitry Andric // directly, use MO_ARM64EC_CALLMANGLE. 4537a6dacacSDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT | 4547a6dacacSDimitry Andric AArch64II::MO_ARM64EC_CALLMANGLE; 4557a6dacacSDimitry Andric } 4567a6dacacSDimitry Andric if (GV->hasExternalLinkage()) { 4577a6dacacSDimitry Andric // If we're calling a symbol directly, use the mangled form in the 4587a6dacacSDimitry Andric // call instruction. 4597a6dacacSDimitry Andric return AArch64II::MO_ARM64EC_CALLMANGLE; 4607a6dacacSDimitry Andric } 461bdd1243dSDimitry Andric } 462bdd1243dSDimitry Andric 463480093f4SDimitry Andric // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB. 464480093f4SDimitry Andric return ClassifyGlobalReference(GV, TM); 465bdd1243dSDimitry Andric } 466480093f4SDimitry Andric 4670b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 4710b57cec5SDimitry Andric unsigned NumRegionInstrs) const { 4720b57cec5SDimitry Andric // LNT run (at least on Cyclone) showed reasonably significant gains for 4730b57cec5SDimitry Andric // bi-directional scheduling. 253.perlbmk. 4740b57cec5SDimitry Andric Policy.OnlyTopDown = false; 4750b57cec5SDimitry Andric Policy.OnlyBottomUp = false; 4760b57cec5SDimitry Andric // Enabling or Disabling the latency heuristic is a close call: It seems to 4770b57cec5SDimitry Andric // help nearly no benchmark on out-of-order architectures, on the other hand 4780b57cec5SDimitry Andric // it regresses register pressure on a few benchmarking. 4790b57cec5SDimitry Andric Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820fca6ea1SDimitry Andric void AArch64Subtarget::adjustSchedDependency( 4830fca6ea1SDimitry Andric SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, 4840fca6ea1SDimitry Andric const TargetSchedModel *SchedModel) const { 4850fca6ea1SDimitry Andric if (!SchedModel || Dep.getKind() != SDep::Kind::Data || !Dep.getReg() || 4860fca6ea1SDimitry Andric !Def->isInstr() || !Use->isInstr() || 4870fca6ea1SDimitry Andric (Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE && 4880fca6ea1SDimitry Andric Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE)) 4890fca6ea1SDimitry Andric return; 4900fca6ea1SDimitry Andric 4910fca6ea1SDimitry Andric // If the Def is a BUNDLE, find the last instruction in the bundle that defs 4920fca6ea1SDimitry Andric // the register. 4930fca6ea1SDimitry Andric const MachineInstr *DefMI = Def->getInstr(); 4940fca6ea1SDimitry Andric if (DefMI->getOpcode() == TargetOpcode::BUNDLE) { 4950fca6ea1SDimitry Andric Register Reg = DefMI->getOperand(DefOpIdx).getReg(); 4960fca6ea1SDimitry Andric for (const auto &Op : const_mi_bundle_ops(*DefMI)) { 4970fca6ea1SDimitry Andric if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) { 4980fca6ea1SDimitry Andric DefMI = Op.getParent(); 4990fca6ea1SDimitry Andric DefOpIdx = Op.getOperandNo(); 5000fca6ea1SDimitry Andric } 5010fca6ea1SDimitry Andric } 5020fca6ea1SDimitry Andric } 5030fca6ea1SDimitry Andric 5040fca6ea1SDimitry Andric // If the Use is a BUNDLE, find the first instruction that uses the Reg. 5050fca6ea1SDimitry Andric const MachineInstr *UseMI = Use->getInstr(); 5060fca6ea1SDimitry Andric if (UseMI->getOpcode() == TargetOpcode::BUNDLE) { 5070fca6ea1SDimitry Andric Register Reg = UseMI->getOperand(UseOpIdx).getReg(); 5080fca6ea1SDimitry Andric for (const auto &Op : const_mi_bundle_ops(*UseMI)) { 5090fca6ea1SDimitry Andric if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) { 5100fca6ea1SDimitry Andric UseMI = Op.getParent(); 5110fca6ea1SDimitry Andric UseOpIdx = Op.getOperandNo(); 5120fca6ea1SDimitry Andric break; 5130fca6ea1SDimitry Andric } 5140fca6ea1SDimitry Andric } 5150fca6ea1SDimitry Andric } 5160fca6ea1SDimitry Andric 5170fca6ea1SDimitry Andric Dep.setLatency( 5180fca6ea1SDimitry Andric SchedModel->computeOperandLatency(DefMI, DefOpIdx, UseMI, UseOpIdx)); 5190fca6ea1SDimitry Andric } 5200fca6ea1SDimitry Andric 5210b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const { 5220b57cec5SDimitry Andric return EnableEarlyIfConvert; 5230b57cec5SDimitry Andric } 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const { 5260b57cec5SDimitry Andric if (!UseAddressTopByteIgnored) 5270b57cec5SDimitry Andric return false; 5280b57cec5SDimitry Andric 52981ad6265SDimitry Andric if (TargetTriple.isDriverKit()) 53081ad6265SDimitry Andric return true; 5310b57cec5SDimitry Andric if (TargetTriple.isiOS()) { 5320eae32dcSDimitry Andric return TargetTriple.getiOSVersion() >= VersionTuple(8); 5330b57cec5SDimitry Andric } 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric return false; 5360b57cec5SDimitry Andric } 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint> 5390b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const { 5408bcb0991SDimitry Andric return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr; 5410b57cec5SDimitry Andric } 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { 5440b57cec5SDimitry Andric // We usually compute max call frame size after ISel. Do the computation now 5450b57cec5SDimitry Andric // if the .mir file didn't specify it. Note that this will probably give you 5460b57cec5SDimitry Andric // bogus values after PEI has eliminated the callframe setup/destroy pseudo 5470b57cec5SDimitry Andric // instructions, specify explicitly if you need it to be correct. 5480b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 5490b57cec5SDimitry Andric if (!MFI.isMaxCallFrameSizeComputed()) 5500b57cec5SDimitry Andric MFI.computeMaxCallFrameSize(MF); 5510b57cec5SDimitry Andric } 5525ffd83dbSDimitry Andric 553fe6060f1SDimitry Andric bool AArch64Subtarget::useAA() const { return UseAA; } 554bdd1243dSDimitry Andric 5555f757f3fSDimitry Andric // If return address signing is enabled, tail calls are emitted as follows: 5565f757f3fSDimitry Andric // 5575f757f3fSDimitry Andric // ``` 5585f757f3fSDimitry Andric // <authenticate LR> 5595f757f3fSDimitry Andric // <check LR> 5605f757f3fSDimitry Andric // TCRETURN ; the callee may sign and spill the LR in its prologue 5615f757f3fSDimitry Andric // ``` 5625f757f3fSDimitry Andric // 5635f757f3fSDimitry Andric // LR may require explicit checking because if FEAT_FPAC is not implemented 5645f757f3fSDimitry Andric // and LR was tampered with, then `<authenticate LR>` will not generate an 5655f757f3fSDimitry Andric // exception on its own. Later, if the callee spills the signed LR value and 5665f757f3fSDimitry Andric // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces 5675f757f3fSDimitry Andric // the higher bits of LR thus hiding the authentication failure. 568*36b606aeSDimitry Andric AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod( 569*36b606aeSDimitry Andric const MachineFunction &MF) const { 570*36b606aeSDimitry Andric // TODO: Check subtarget for the scheme. Present variant is a default for 571*36b606aeSDimitry Andric // pauthtest ABI. 572*36b606aeSDimitry Andric if (MF.getFunction().hasFnAttribute("ptrauth-returns") && 573*36b606aeSDimitry Andric MF.getFunction().hasFnAttribute("ptrauth-auth-traps")) 574*36b606aeSDimitry Andric return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI; 5755f757f3fSDimitry Andric if (AuthenticatedLRCheckMethod.getNumOccurrences()) 5765f757f3fSDimitry Andric return AuthenticatedLRCheckMethod; 5775f757f3fSDimitry Andric 5785f757f3fSDimitry Andric // At now, use None by default because checks may introduce an unexpected 5795f757f3fSDimitry Andric // performance regression or incompatibility with execute-only mappings. 5805f757f3fSDimitry Andric return AArch64PAuth::AuthCheckMethod::None; 581bdd1243dSDimitry Andric } 5820fca6ea1SDimitry Andric 5830fca6ea1SDimitry Andric std::optional<uint16_t> 5840fca6ea1SDimitry Andric AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled( 5850fca6ea1SDimitry Andric const Function &ParentFn) const { 5860fca6ea1SDimitry Andric if (!ParentFn.hasFnAttribute("ptrauth-indirect-gotos")) 5870fca6ea1SDimitry Andric return std::nullopt; 5880fca6ea1SDimitry Andric // We currently have one simple mechanism for all targets. 5890fca6ea1SDimitry Andric // This isn't ABI, so we can always do better in the future. 5900fca6ea1SDimitry Andric return getPointerAuthStableSipHash( 5910fca6ea1SDimitry Andric (Twine(ParentFn.getName()) + " blockaddress").str()); 5920fca6ea1SDimitry Andric } 5930fca6ea1SDimitry Andric 5940fca6ea1SDimitry Andric bool AArch64Subtarget::enableMachinePipeliner() const { 5950fca6ea1SDimitry Andric return getSchedModel().hasInstrSchedModel(); 5960fca6ea1SDimitry Andric } 597