1*e8d8bef9SDimitry Andric //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// 2*e8d8bef9SDimitry Andric // 3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e8d8bef9SDimitry Andric // 7*e8d8bef9SDimitry Andric //==-----------------------------------------------------------------------===// 8*e8d8bef9SDimitry Andric // 9*e8d8bef9SDimitry Andric /// \file 10*e8d8bef9SDimitry Andric /// AMD GCN specific subclass of TargetSubtarget. 11*e8d8bef9SDimitry Andric // 12*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 13*e8d8bef9SDimitry Andric 14*e8d8bef9SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 15*e8d8bef9SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 16*e8d8bef9SDimitry Andric 17*e8d8bef9SDimitry Andric #include "AMDGPUCallLowering.h" 18*e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 19*e8d8bef9SDimitry Andric #include "SIFrameLowering.h" 20*e8d8bef9SDimitry Andric #include "SIISelLowering.h" 21*e8d8bef9SDimitry Andric #include "SIInstrInfo.h" 22*e8d8bef9SDimitry Andric #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 23*e8d8bef9SDimitry Andric 24*e8d8bef9SDimitry Andric namespace llvm { 25*e8d8bef9SDimitry Andric 26*e8d8bef9SDimitry Andric class MCInst; 27*e8d8bef9SDimitry Andric class MCInstrInfo; 28*e8d8bef9SDimitry Andric 29*e8d8bef9SDimitry Andric } // namespace llvm 30*e8d8bef9SDimitry Andric 31*e8d8bef9SDimitry Andric #define GET_SUBTARGETINFO_HEADER 32*e8d8bef9SDimitry Andric #include "AMDGPUGenSubtargetInfo.inc" 33*e8d8bef9SDimitry Andric 34*e8d8bef9SDimitry Andric namespace llvm { 35*e8d8bef9SDimitry Andric 36*e8d8bef9SDimitry Andric class GCNTargetMachine; 37*e8d8bef9SDimitry Andric 38*e8d8bef9SDimitry Andric class GCNSubtarget final : public AMDGPUGenSubtargetInfo, 39*e8d8bef9SDimitry Andric public AMDGPUSubtarget { 40*e8d8bef9SDimitry Andric 41*e8d8bef9SDimitry Andric using AMDGPUSubtarget::getMaxWavesPerEU; 42*e8d8bef9SDimitry Andric 43*e8d8bef9SDimitry Andric public: 44*e8d8bef9SDimitry Andric enum TrapHandlerAbi { 45*e8d8bef9SDimitry Andric TrapHandlerAbiNone = 0, 46*e8d8bef9SDimitry Andric TrapHandlerAbiHsa = 1 47*e8d8bef9SDimitry Andric }; 48*e8d8bef9SDimitry Andric 49*e8d8bef9SDimitry Andric enum TrapID { 50*e8d8bef9SDimitry Andric TrapIDHardwareReserved = 0, 51*e8d8bef9SDimitry Andric TrapIDHSADebugTrap = 1, 52*e8d8bef9SDimitry Andric TrapIDLLVMTrap = 2, 53*e8d8bef9SDimitry Andric TrapIDLLVMDebugTrap = 3, 54*e8d8bef9SDimitry Andric TrapIDDebugBreakpoint = 7, 55*e8d8bef9SDimitry Andric TrapIDDebugReserved8 = 8, 56*e8d8bef9SDimitry Andric TrapIDDebugReservedFE = 0xfe, 57*e8d8bef9SDimitry Andric TrapIDDebugReservedFF = 0xff 58*e8d8bef9SDimitry Andric }; 59*e8d8bef9SDimitry Andric 60*e8d8bef9SDimitry Andric enum TrapRegValues { 61*e8d8bef9SDimitry Andric LLVMTrapHandlerRegValue = 1 62*e8d8bef9SDimitry Andric }; 63*e8d8bef9SDimitry Andric 64*e8d8bef9SDimitry Andric private: 65*e8d8bef9SDimitry Andric /// GlobalISel related APIs. 66*e8d8bef9SDimitry Andric std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 67*e8d8bef9SDimitry Andric std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 68*e8d8bef9SDimitry Andric std::unique_ptr<InstructionSelector> InstSelector; 69*e8d8bef9SDimitry Andric std::unique_ptr<LegalizerInfo> Legalizer; 70*e8d8bef9SDimitry Andric std::unique_ptr<RegisterBankInfo> RegBankInfo; 71*e8d8bef9SDimitry Andric 72*e8d8bef9SDimitry Andric protected: 73*e8d8bef9SDimitry Andric // Basic subtarget description. 74*e8d8bef9SDimitry Andric Triple TargetTriple; 75*e8d8bef9SDimitry Andric AMDGPU::IsaInfo::AMDGPUTargetID TargetID; 76*e8d8bef9SDimitry Andric unsigned Gen; 77*e8d8bef9SDimitry Andric InstrItineraryData InstrItins; 78*e8d8bef9SDimitry Andric int LDSBankCount; 79*e8d8bef9SDimitry Andric unsigned MaxPrivateElementSize; 80*e8d8bef9SDimitry Andric 81*e8d8bef9SDimitry Andric // Possibly statically set by tablegen, but may want to be overridden. 82*e8d8bef9SDimitry Andric bool FastFMAF32; 83*e8d8bef9SDimitry Andric bool FastDenormalF32; 84*e8d8bef9SDimitry Andric bool HalfRate64Ops; 85*e8d8bef9SDimitry Andric 86*e8d8bef9SDimitry Andric // Dynamically set bits that enable features. 87*e8d8bef9SDimitry Andric bool FlatForGlobal; 88*e8d8bef9SDimitry Andric bool AutoWaitcntBeforeBarrier; 89*e8d8bef9SDimitry Andric bool UnalignedScratchAccess; 90*e8d8bef9SDimitry Andric bool UnalignedAccessMode; 91*e8d8bef9SDimitry Andric bool HasApertureRegs; 92*e8d8bef9SDimitry Andric bool SupportsXNACK; 93*e8d8bef9SDimitry Andric 94*e8d8bef9SDimitry Andric // This should not be used directly. 'TargetID' tracks the dynamic settings 95*e8d8bef9SDimitry Andric // for XNACK. 96*e8d8bef9SDimitry Andric bool EnableXNACK; 97*e8d8bef9SDimitry Andric 98*e8d8bef9SDimitry Andric bool EnableCuMode; 99*e8d8bef9SDimitry Andric bool TrapHandler; 100*e8d8bef9SDimitry Andric 101*e8d8bef9SDimitry Andric // Used as options. 102*e8d8bef9SDimitry Andric bool EnableLoadStoreOpt; 103*e8d8bef9SDimitry Andric bool EnableUnsafeDSOffsetFolding; 104*e8d8bef9SDimitry Andric bool EnableSIScheduler; 105*e8d8bef9SDimitry Andric bool EnableDS128; 106*e8d8bef9SDimitry Andric bool EnablePRTStrictNull; 107*e8d8bef9SDimitry Andric bool DumpCode; 108*e8d8bef9SDimitry Andric 109*e8d8bef9SDimitry Andric // Subtarget statically properties set by tablegen 110*e8d8bef9SDimitry Andric bool FP64; 111*e8d8bef9SDimitry Andric bool FMA; 112*e8d8bef9SDimitry Andric bool MIMG_R128; 113*e8d8bef9SDimitry Andric bool GCN3Encoding; 114*e8d8bef9SDimitry Andric bool CIInsts; 115*e8d8bef9SDimitry Andric bool GFX8Insts; 116*e8d8bef9SDimitry Andric bool GFX9Insts; 117*e8d8bef9SDimitry Andric bool GFX10Insts; 118*e8d8bef9SDimitry Andric bool GFX10_3Insts; 119*e8d8bef9SDimitry Andric bool GFX7GFX8GFX9Insts; 120*e8d8bef9SDimitry Andric bool SGPRInitBug; 121*e8d8bef9SDimitry Andric bool HasSMemRealTime; 122*e8d8bef9SDimitry Andric bool HasIntClamp; 123*e8d8bef9SDimitry Andric bool HasFmaMixInsts; 124*e8d8bef9SDimitry Andric bool HasMovrel; 125*e8d8bef9SDimitry Andric bool HasVGPRIndexMode; 126*e8d8bef9SDimitry Andric bool HasScalarStores; 127*e8d8bef9SDimitry Andric bool HasScalarAtomics; 128*e8d8bef9SDimitry Andric bool HasSDWAOmod; 129*e8d8bef9SDimitry Andric bool HasSDWAScalar; 130*e8d8bef9SDimitry Andric bool HasSDWASdst; 131*e8d8bef9SDimitry Andric bool HasSDWAMac; 132*e8d8bef9SDimitry Andric bool HasSDWAOutModsVOPC; 133*e8d8bef9SDimitry Andric bool HasDPP; 134*e8d8bef9SDimitry Andric bool HasDPP8; 135*e8d8bef9SDimitry Andric bool HasR128A16; 136*e8d8bef9SDimitry Andric bool HasGFX10A16; 137*e8d8bef9SDimitry Andric bool HasG16; 138*e8d8bef9SDimitry Andric bool HasNSAEncoding; 139*e8d8bef9SDimitry Andric bool GFX10_BEncoding; 140*e8d8bef9SDimitry Andric bool HasDLInsts; 141*e8d8bef9SDimitry Andric bool HasDot1Insts; 142*e8d8bef9SDimitry Andric bool HasDot2Insts; 143*e8d8bef9SDimitry Andric bool HasDot3Insts; 144*e8d8bef9SDimitry Andric bool HasDot4Insts; 145*e8d8bef9SDimitry Andric bool HasDot5Insts; 146*e8d8bef9SDimitry Andric bool HasDot6Insts; 147*e8d8bef9SDimitry Andric bool HasMAIInsts; 148*e8d8bef9SDimitry Andric bool HasPkFmacF16Inst; 149*e8d8bef9SDimitry Andric bool HasAtomicFaddInsts; 150*e8d8bef9SDimitry Andric bool SupportsSRAMECC; 151*e8d8bef9SDimitry Andric 152*e8d8bef9SDimitry Andric // This should not be used directly. 'TargetID' tracks the dynamic settings 153*e8d8bef9SDimitry Andric // for SRAMECC. 154*e8d8bef9SDimitry Andric bool EnableSRAMECC; 155*e8d8bef9SDimitry Andric 156*e8d8bef9SDimitry Andric bool HasNoSdstCMPX; 157*e8d8bef9SDimitry Andric bool HasVscnt; 158*e8d8bef9SDimitry Andric bool HasGetWaveIdInst; 159*e8d8bef9SDimitry Andric bool HasSMemTimeInst; 160*e8d8bef9SDimitry Andric bool HasRegisterBanking; 161*e8d8bef9SDimitry Andric bool HasVOP3Literal; 162*e8d8bef9SDimitry Andric bool HasNoDataDepHazard; 163*e8d8bef9SDimitry Andric bool FlatAddressSpace; 164*e8d8bef9SDimitry Andric bool FlatInstOffsets; 165*e8d8bef9SDimitry Andric bool FlatGlobalInsts; 166*e8d8bef9SDimitry Andric bool FlatScratchInsts; 167*e8d8bef9SDimitry Andric bool ScalarFlatScratchInsts; 168*e8d8bef9SDimitry Andric bool AddNoCarryInsts; 169*e8d8bef9SDimitry Andric bool HasUnpackedD16VMem; 170*e8d8bef9SDimitry Andric bool LDSMisalignedBug; 171*e8d8bef9SDimitry Andric bool HasMFMAInlineLiteralBug; 172*e8d8bef9SDimitry Andric bool UnalignedBufferAccess; 173*e8d8bef9SDimitry Andric bool UnalignedDSAccess; 174*e8d8bef9SDimitry Andric bool ScalarizeGlobal; 175*e8d8bef9SDimitry Andric 176*e8d8bef9SDimitry Andric bool HasVcmpxPermlaneHazard; 177*e8d8bef9SDimitry Andric bool HasVMEMtoScalarWriteHazard; 178*e8d8bef9SDimitry Andric bool HasSMEMtoVectorWriteHazard; 179*e8d8bef9SDimitry Andric bool HasInstFwdPrefetchBug; 180*e8d8bef9SDimitry Andric bool HasVcmpxExecWARHazard; 181*e8d8bef9SDimitry Andric bool HasLdsBranchVmemWARHazard; 182*e8d8bef9SDimitry Andric bool HasNSAtoVMEMBug; 183*e8d8bef9SDimitry Andric bool HasOffset3fBug; 184*e8d8bef9SDimitry Andric bool HasFlatSegmentOffsetBug; 185*e8d8bef9SDimitry Andric bool HasImageStoreD16Bug; 186*e8d8bef9SDimitry Andric bool HasImageGather4D16Bug; 187*e8d8bef9SDimitry Andric 188*e8d8bef9SDimitry Andric // Dummy feature to use for assembler in tablegen. 189*e8d8bef9SDimitry Andric bool FeatureDisable; 190*e8d8bef9SDimitry Andric 191*e8d8bef9SDimitry Andric SelectionDAGTargetInfo TSInfo; 192*e8d8bef9SDimitry Andric private: 193*e8d8bef9SDimitry Andric SIInstrInfo InstrInfo; 194*e8d8bef9SDimitry Andric SITargetLowering TLInfo; 195*e8d8bef9SDimitry Andric SIFrameLowering FrameLowering; 196*e8d8bef9SDimitry Andric 197*e8d8bef9SDimitry Andric public: 198*e8d8bef9SDimitry Andric // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. 199*e8d8bef9SDimitry Andric static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); 200*e8d8bef9SDimitry Andric 201*e8d8bef9SDimitry Andric GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 202*e8d8bef9SDimitry Andric const GCNTargetMachine &TM); 203*e8d8bef9SDimitry Andric ~GCNSubtarget() override; 204*e8d8bef9SDimitry Andric 205*e8d8bef9SDimitry Andric GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, 206*e8d8bef9SDimitry Andric StringRef GPU, StringRef FS); 207*e8d8bef9SDimitry Andric 208*e8d8bef9SDimitry Andric const SIInstrInfo *getInstrInfo() const override { 209*e8d8bef9SDimitry Andric return &InstrInfo; 210*e8d8bef9SDimitry Andric } 211*e8d8bef9SDimitry Andric 212*e8d8bef9SDimitry Andric const SIFrameLowering *getFrameLowering() const override { 213*e8d8bef9SDimitry Andric return &FrameLowering; 214*e8d8bef9SDimitry Andric } 215*e8d8bef9SDimitry Andric 216*e8d8bef9SDimitry Andric const SITargetLowering *getTargetLowering() const override { 217*e8d8bef9SDimitry Andric return &TLInfo; 218*e8d8bef9SDimitry Andric } 219*e8d8bef9SDimitry Andric 220*e8d8bef9SDimitry Andric const SIRegisterInfo *getRegisterInfo() const override { 221*e8d8bef9SDimitry Andric return &InstrInfo.getRegisterInfo(); 222*e8d8bef9SDimitry Andric } 223*e8d8bef9SDimitry Andric 224*e8d8bef9SDimitry Andric const CallLowering *getCallLowering() const override { 225*e8d8bef9SDimitry Andric return CallLoweringInfo.get(); 226*e8d8bef9SDimitry Andric } 227*e8d8bef9SDimitry Andric 228*e8d8bef9SDimitry Andric const InlineAsmLowering *getInlineAsmLowering() const override { 229*e8d8bef9SDimitry Andric return InlineAsmLoweringInfo.get(); 230*e8d8bef9SDimitry Andric } 231*e8d8bef9SDimitry Andric 232*e8d8bef9SDimitry Andric InstructionSelector *getInstructionSelector() const override { 233*e8d8bef9SDimitry Andric return InstSelector.get(); 234*e8d8bef9SDimitry Andric } 235*e8d8bef9SDimitry Andric 236*e8d8bef9SDimitry Andric const LegalizerInfo *getLegalizerInfo() const override { 237*e8d8bef9SDimitry Andric return Legalizer.get(); 238*e8d8bef9SDimitry Andric } 239*e8d8bef9SDimitry Andric 240*e8d8bef9SDimitry Andric const RegisterBankInfo *getRegBankInfo() const override { 241*e8d8bef9SDimitry Andric return RegBankInfo.get(); 242*e8d8bef9SDimitry Andric } 243*e8d8bef9SDimitry Andric 244*e8d8bef9SDimitry Andric // Nothing implemented, just prevent crashes on use. 245*e8d8bef9SDimitry Andric const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { 246*e8d8bef9SDimitry Andric return &TSInfo; 247*e8d8bef9SDimitry Andric } 248*e8d8bef9SDimitry Andric 249*e8d8bef9SDimitry Andric const InstrItineraryData *getInstrItineraryData() const override { 250*e8d8bef9SDimitry Andric return &InstrItins; 251*e8d8bef9SDimitry Andric } 252*e8d8bef9SDimitry Andric 253*e8d8bef9SDimitry Andric void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 254*e8d8bef9SDimitry Andric 255*e8d8bef9SDimitry Andric Generation getGeneration() const { 256*e8d8bef9SDimitry Andric return (Generation)Gen; 257*e8d8bef9SDimitry Andric } 258*e8d8bef9SDimitry Andric 259*e8d8bef9SDimitry Andric /// Return the number of high bits known to be zero fror a frame index. 260*e8d8bef9SDimitry Andric unsigned getKnownHighZeroBitsForFrameIndex() const { 261*e8d8bef9SDimitry Andric return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); 262*e8d8bef9SDimitry Andric } 263*e8d8bef9SDimitry Andric 264*e8d8bef9SDimitry Andric int getLDSBankCount() const { 265*e8d8bef9SDimitry Andric return LDSBankCount; 266*e8d8bef9SDimitry Andric } 267*e8d8bef9SDimitry Andric 268*e8d8bef9SDimitry Andric unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { 269*e8d8bef9SDimitry Andric return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; 270*e8d8bef9SDimitry Andric } 271*e8d8bef9SDimitry Andric 272*e8d8bef9SDimitry Andric unsigned getConstantBusLimit(unsigned Opcode) const; 273*e8d8bef9SDimitry Andric 274*e8d8bef9SDimitry Andric bool hasIntClamp() const { 275*e8d8bef9SDimitry Andric return HasIntClamp; 276*e8d8bef9SDimitry Andric } 277*e8d8bef9SDimitry Andric 278*e8d8bef9SDimitry Andric bool hasFP64() const { 279*e8d8bef9SDimitry Andric return FP64; 280*e8d8bef9SDimitry Andric } 281*e8d8bef9SDimitry Andric 282*e8d8bef9SDimitry Andric bool hasMIMG_R128() const { 283*e8d8bef9SDimitry Andric return MIMG_R128; 284*e8d8bef9SDimitry Andric } 285*e8d8bef9SDimitry Andric 286*e8d8bef9SDimitry Andric bool hasHWFP64() const { 287*e8d8bef9SDimitry Andric return FP64; 288*e8d8bef9SDimitry Andric } 289*e8d8bef9SDimitry Andric 290*e8d8bef9SDimitry Andric bool hasFastFMAF32() const { 291*e8d8bef9SDimitry Andric return FastFMAF32; 292*e8d8bef9SDimitry Andric } 293*e8d8bef9SDimitry Andric 294*e8d8bef9SDimitry Andric bool hasHalfRate64Ops() const { 295*e8d8bef9SDimitry Andric return HalfRate64Ops; 296*e8d8bef9SDimitry Andric } 297*e8d8bef9SDimitry Andric 298*e8d8bef9SDimitry Andric bool hasAddr64() const { 299*e8d8bef9SDimitry Andric return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); 300*e8d8bef9SDimitry Andric } 301*e8d8bef9SDimitry Andric 302*e8d8bef9SDimitry Andric bool hasFlat() const { 303*e8d8bef9SDimitry Andric return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); 304*e8d8bef9SDimitry Andric } 305*e8d8bef9SDimitry Andric 306*e8d8bef9SDimitry Andric // Return true if the target only has the reverse operand versions of VALU 307*e8d8bef9SDimitry Andric // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). 308*e8d8bef9SDimitry Andric bool hasOnlyRevVALUShifts() const { 309*e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 310*e8d8bef9SDimitry Andric } 311*e8d8bef9SDimitry Andric 312*e8d8bef9SDimitry Andric bool hasFractBug() const { 313*e8d8bef9SDimitry Andric return getGeneration() == SOUTHERN_ISLANDS; 314*e8d8bef9SDimitry Andric } 315*e8d8bef9SDimitry Andric 316*e8d8bef9SDimitry Andric bool hasBFE() const { 317*e8d8bef9SDimitry Andric return true; 318*e8d8bef9SDimitry Andric } 319*e8d8bef9SDimitry Andric 320*e8d8bef9SDimitry Andric bool hasBFI() const { 321*e8d8bef9SDimitry Andric return true; 322*e8d8bef9SDimitry Andric } 323*e8d8bef9SDimitry Andric 324*e8d8bef9SDimitry Andric bool hasBFM() const { 325*e8d8bef9SDimitry Andric return hasBFE(); 326*e8d8bef9SDimitry Andric } 327*e8d8bef9SDimitry Andric 328*e8d8bef9SDimitry Andric bool hasBCNT(unsigned Size) const { 329*e8d8bef9SDimitry Andric return true; 330*e8d8bef9SDimitry Andric } 331*e8d8bef9SDimitry Andric 332*e8d8bef9SDimitry Andric bool hasFFBL() const { 333*e8d8bef9SDimitry Andric return true; 334*e8d8bef9SDimitry Andric } 335*e8d8bef9SDimitry Andric 336*e8d8bef9SDimitry Andric bool hasFFBH() const { 337*e8d8bef9SDimitry Andric return true; 338*e8d8bef9SDimitry Andric } 339*e8d8bef9SDimitry Andric 340*e8d8bef9SDimitry Andric bool hasMed3_16() const { 341*e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 342*e8d8bef9SDimitry Andric } 343*e8d8bef9SDimitry Andric 344*e8d8bef9SDimitry Andric bool hasMin3Max3_16() const { 345*e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 346*e8d8bef9SDimitry Andric } 347*e8d8bef9SDimitry Andric 348*e8d8bef9SDimitry Andric bool hasFmaMixInsts() const { 349*e8d8bef9SDimitry Andric return HasFmaMixInsts; 350*e8d8bef9SDimitry Andric } 351*e8d8bef9SDimitry Andric 352*e8d8bef9SDimitry Andric bool hasCARRY() const { 353*e8d8bef9SDimitry Andric return true; 354*e8d8bef9SDimitry Andric } 355*e8d8bef9SDimitry Andric 356*e8d8bef9SDimitry Andric bool hasFMA() const { 357*e8d8bef9SDimitry Andric return FMA; 358*e8d8bef9SDimitry Andric } 359*e8d8bef9SDimitry Andric 360*e8d8bef9SDimitry Andric bool hasSwap() const { 361*e8d8bef9SDimitry Andric return GFX9Insts; 362*e8d8bef9SDimitry Andric } 363*e8d8bef9SDimitry Andric 364*e8d8bef9SDimitry Andric bool hasScalarPackInsts() const { 365*e8d8bef9SDimitry Andric return GFX9Insts; 366*e8d8bef9SDimitry Andric } 367*e8d8bef9SDimitry Andric 368*e8d8bef9SDimitry Andric bool hasScalarMulHiInsts() const { 369*e8d8bef9SDimitry Andric return GFX9Insts; 370*e8d8bef9SDimitry Andric } 371*e8d8bef9SDimitry Andric 372*e8d8bef9SDimitry Andric TrapHandlerAbi getTrapHandlerAbi() const { 373*e8d8bef9SDimitry Andric return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; 374*e8d8bef9SDimitry Andric } 375*e8d8bef9SDimitry Andric 376*e8d8bef9SDimitry Andric /// True if the offset field of DS instructions works as expected. On SI, the 377*e8d8bef9SDimitry Andric /// offset uses a 16-bit adder and does not always wrap properly. 378*e8d8bef9SDimitry Andric bool hasUsableDSOffset() const { 379*e8d8bef9SDimitry Andric return getGeneration() >= SEA_ISLANDS; 380*e8d8bef9SDimitry Andric } 381*e8d8bef9SDimitry Andric 382*e8d8bef9SDimitry Andric bool unsafeDSOffsetFoldingEnabled() const { 383*e8d8bef9SDimitry Andric return EnableUnsafeDSOffsetFolding; 384*e8d8bef9SDimitry Andric } 385*e8d8bef9SDimitry Andric 386*e8d8bef9SDimitry Andric /// Condition output from div_scale is usable. 387*e8d8bef9SDimitry Andric bool hasUsableDivScaleConditionOutput() const { 388*e8d8bef9SDimitry Andric return getGeneration() != SOUTHERN_ISLANDS; 389*e8d8bef9SDimitry Andric } 390*e8d8bef9SDimitry Andric 391*e8d8bef9SDimitry Andric /// Extra wait hazard is needed in some cases before 392*e8d8bef9SDimitry Andric /// s_cbranch_vccnz/s_cbranch_vccz. 393*e8d8bef9SDimitry Andric bool hasReadVCCZBug() const { 394*e8d8bef9SDimitry Andric return getGeneration() <= SEA_ISLANDS; 395*e8d8bef9SDimitry Andric } 396*e8d8bef9SDimitry Andric 397*e8d8bef9SDimitry Andric /// Writes to VCC_LO/VCC_HI update the VCCZ flag. 398*e8d8bef9SDimitry Andric bool partialVCCWritesUpdateVCCZ() const { 399*e8d8bef9SDimitry Andric return getGeneration() >= GFX10; 400*e8d8bef9SDimitry Andric } 401*e8d8bef9SDimitry Andric 402*e8d8bef9SDimitry Andric /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR 403*e8d8bef9SDimitry Andric /// was written by a VALU instruction. 404*e8d8bef9SDimitry Andric bool hasSMRDReadVALUDefHazard() const { 405*e8d8bef9SDimitry Andric return getGeneration() == SOUTHERN_ISLANDS; 406*e8d8bef9SDimitry Andric } 407*e8d8bef9SDimitry Andric 408*e8d8bef9SDimitry Andric /// A read of an SGPR by a VMEM instruction requires 5 wait states when the 409*e8d8bef9SDimitry Andric /// SGPR was written by a VALU Instruction. 410*e8d8bef9SDimitry Andric bool hasVMEMReadSGPRVALUDefHazard() const { 411*e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 412*e8d8bef9SDimitry Andric } 413*e8d8bef9SDimitry Andric 414*e8d8bef9SDimitry Andric bool hasRFEHazards() const { 415*e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 416*e8d8bef9SDimitry Andric } 417*e8d8bef9SDimitry Andric 418*e8d8bef9SDimitry Andric /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. 419*e8d8bef9SDimitry Andric unsigned getSetRegWaitStates() const { 420*e8d8bef9SDimitry Andric return getGeneration() <= SEA_ISLANDS ? 1 : 2; 421*e8d8bef9SDimitry Andric } 422*e8d8bef9SDimitry Andric 423*e8d8bef9SDimitry Andric bool dumpCode() const { 424*e8d8bef9SDimitry Andric return DumpCode; 425*e8d8bef9SDimitry Andric } 426*e8d8bef9SDimitry Andric 427*e8d8bef9SDimitry Andric /// Return the amount of LDS that can be used that will not restrict the 428*e8d8bef9SDimitry Andric /// occupancy lower than WaveCount. 429*e8d8bef9SDimitry Andric unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 430*e8d8bef9SDimitry Andric const Function &) const; 431*e8d8bef9SDimitry Andric 432*e8d8bef9SDimitry Andric bool supportsMinMaxDenormModes() const { 433*e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 434*e8d8bef9SDimitry Andric } 435*e8d8bef9SDimitry Andric 436*e8d8bef9SDimitry Andric /// \returns If target supports S_DENORM_MODE. 437*e8d8bef9SDimitry Andric bool hasDenormModeInst() const { 438*e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX10; 439*e8d8bef9SDimitry Andric } 440*e8d8bef9SDimitry Andric 441*e8d8bef9SDimitry Andric bool useFlatForGlobal() const { 442*e8d8bef9SDimitry Andric return FlatForGlobal; 443*e8d8bef9SDimitry Andric } 444*e8d8bef9SDimitry Andric 445*e8d8bef9SDimitry Andric /// \returns If target supports ds_read/write_b128 and user enables generation 446*e8d8bef9SDimitry Andric /// of ds_read/write_b128. 447*e8d8bef9SDimitry Andric bool useDS128() const { 448*e8d8bef9SDimitry Andric return CIInsts && EnableDS128; 449*e8d8bef9SDimitry Andric } 450*e8d8bef9SDimitry Andric 451*e8d8bef9SDimitry Andric /// \return If target supports ds_read/write_b96/128. 452*e8d8bef9SDimitry Andric bool hasDS96AndDS128() const { 453*e8d8bef9SDimitry Andric return CIInsts; 454*e8d8bef9SDimitry Andric } 455*e8d8bef9SDimitry Andric 456*e8d8bef9SDimitry Andric /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 457*e8d8bef9SDimitry Andric bool haveRoundOpsF64() const { 458*e8d8bef9SDimitry Andric return CIInsts; 459*e8d8bef9SDimitry Andric } 460*e8d8bef9SDimitry Andric 461*e8d8bef9SDimitry Andric /// \returns If MUBUF instructions always perform range checking, even for 462*e8d8bef9SDimitry Andric /// buffer resources used for private memory access. 463*e8d8bef9SDimitry Andric bool privateMemoryResourceIsRangeChecked() const { 464*e8d8bef9SDimitry Andric return getGeneration() < AMDGPUSubtarget::GFX9; 465*e8d8bef9SDimitry Andric } 466*e8d8bef9SDimitry Andric 467*e8d8bef9SDimitry Andric /// \returns If target requires PRT Struct NULL support (zero result registers 468*e8d8bef9SDimitry Andric /// for sparse texture support). 469*e8d8bef9SDimitry Andric bool usePRTStrictNull() const { 470*e8d8bef9SDimitry Andric return EnablePRTStrictNull; 471*e8d8bef9SDimitry Andric } 472*e8d8bef9SDimitry Andric 473*e8d8bef9SDimitry Andric bool hasAutoWaitcntBeforeBarrier() const { 474*e8d8bef9SDimitry Andric return AutoWaitcntBeforeBarrier; 475*e8d8bef9SDimitry Andric } 476*e8d8bef9SDimitry Andric 477*e8d8bef9SDimitry Andric bool hasUnalignedBufferAccess() const { 478*e8d8bef9SDimitry Andric return UnalignedBufferAccess; 479*e8d8bef9SDimitry Andric } 480*e8d8bef9SDimitry Andric 481*e8d8bef9SDimitry Andric bool hasUnalignedBufferAccessEnabled() const { 482*e8d8bef9SDimitry Andric return UnalignedBufferAccess && UnalignedAccessMode; 483*e8d8bef9SDimitry Andric } 484*e8d8bef9SDimitry Andric 485*e8d8bef9SDimitry Andric bool hasUnalignedDSAccess() const { 486*e8d8bef9SDimitry Andric return UnalignedDSAccess; 487*e8d8bef9SDimitry Andric } 488*e8d8bef9SDimitry Andric 489*e8d8bef9SDimitry Andric bool hasUnalignedDSAccessEnabled() const { 490*e8d8bef9SDimitry Andric return UnalignedDSAccess && UnalignedAccessMode; 491*e8d8bef9SDimitry Andric } 492*e8d8bef9SDimitry Andric 493*e8d8bef9SDimitry Andric bool hasUnalignedScratchAccess() const { 494*e8d8bef9SDimitry Andric return UnalignedScratchAccess; 495*e8d8bef9SDimitry Andric } 496*e8d8bef9SDimitry Andric 497*e8d8bef9SDimitry Andric bool hasUnalignedAccessMode() const { 498*e8d8bef9SDimitry Andric return UnalignedAccessMode; 499*e8d8bef9SDimitry Andric } 500*e8d8bef9SDimitry Andric 501*e8d8bef9SDimitry Andric bool hasApertureRegs() const { 502*e8d8bef9SDimitry Andric return HasApertureRegs; 503*e8d8bef9SDimitry Andric } 504*e8d8bef9SDimitry Andric 505*e8d8bef9SDimitry Andric bool isTrapHandlerEnabled() const { 506*e8d8bef9SDimitry Andric return TrapHandler; 507*e8d8bef9SDimitry Andric } 508*e8d8bef9SDimitry Andric 509*e8d8bef9SDimitry Andric bool isXNACKEnabled() const { 510*e8d8bef9SDimitry Andric return TargetID.isXnackOnOrAny(); 511*e8d8bef9SDimitry Andric } 512*e8d8bef9SDimitry Andric 513*e8d8bef9SDimitry Andric bool isCuModeEnabled() const { 514*e8d8bef9SDimitry Andric return EnableCuMode; 515*e8d8bef9SDimitry Andric } 516*e8d8bef9SDimitry Andric 517*e8d8bef9SDimitry Andric bool hasFlatAddressSpace() const { 518*e8d8bef9SDimitry Andric return FlatAddressSpace; 519*e8d8bef9SDimitry Andric } 520*e8d8bef9SDimitry Andric 521*e8d8bef9SDimitry Andric bool hasFlatScrRegister() const { 522*e8d8bef9SDimitry Andric return hasFlatAddressSpace(); 523*e8d8bef9SDimitry Andric } 524*e8d8bef9SDimitry Andric 525*e8d8bef9SDimitry Andric bool hasFlatInstOffsets() const { 526*e8d8bef9SDimitry Andric return FlatInstOffsets; 527*e8d8bef9SDimitry Andric } 528*e8d8bef9SDimitry Andric 529*e8d8bef9SDimitry Andric bool hasFlatGlobalInsts() const { 530*e8d8bef9SDimitry Andric return FlatGlobalInsts; 531*e8d8bef9SDimitry Andric } 532*e8d8bef9SDimitry Andric 533*e8d8bef9SDimitry Andric bool hasFlatScratchInsts() const { 534*e8d8bef9SDimitry Andric return FlatScratchInsts; 535*e8d8bef9SDimitry Andric } 536*e8d8bef9SDimitry Andric 537*e8d8bef9SDimitry Andric // Check if target supports ST addressing mode with FLAT scratch instructions. 538*e8d8bef9SDimitry Andric // The ST addressing mode means no registers are used, either VGPR or SGPR, 539*e8d8bef9SDimitry Andric // but only immediate offset is swizzled and added to the FLAT scratch base. 540*e8d8bef9SDimitry Andric bool hasFlatScratchSTMode() const { 541*e8d8bef9SDimitry Andric return hasFlatScratchInsts() && hasGFX10_3Insts(); 542*e8d8bef9SDimitry Andric } 543*e8d8bef9SDimitry Andric 544*e8d8bef9SDimitry Andric bool hasScalarFlatScratchInsts() const { 545*e8d8bef9SDimitry Andric return ScalarFlatScratchInsts; 546*e8d8bef9SDimitry Andric } 547*e8d8bef9SDimitry Andric 548*e8d8bef9SDimitry Andric bool hasGlobalAddTidInsts() const { 549*e8d8bef9SDimitry Andric return GFX10_BEncoding; 550*e8d8bef9SDimitry Andric } 551*e8d8bef9SDimitry Andric 552*e8d8bef9SDimitry Andric bool hasAtomicCSub() const { 553*e8d8bef9SDimitry Andric return GFX10_BEncoding; 554*e8d8bef9SDimitry Andric } 555*e8d8bef9SDimitry Andric 556*e8d8bef9SDimitry Andric bool hasMultiDwordFlatScratchAddressing() const { 557*e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 558*e8d8bef9SDimitry Andric } 559*e8d8bef9SDimitry Andric 560*e8d8bef9SDimitry Andric bool hasFlatSegmentOffsetBug() const { 561*e8d8bef9SDimitry Andric return HasFlatSegmentOffsetBug; 562*e8d8bef9SDimitry Andric } 563*e8d8bef9SDimitry Andric 564*e8d8bef9SDimitry Andric bool hasFlatLgkmVMemCountInOrder() const { 565*e8d8bef9SDimitry Andric return getGeneration() > GFX9; 566*e8d8bef9SDimitry Andric } 567*e8d8bef9SDimitry Andric 568*e8d8bef9SDimitry Andric bool hasD16LoadStore() const { 569*e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 570*e8d8bef9SDimitry Andric } 571*e8d8bef9SDimitry Andric 572*e8d8bef9SDimitry Andric bool d16PreservesUnusedBits() const { 573*e8d8bef9SDimitry Andric return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); 574*e8d8bef9SDimitry Andric } 575*e8d8bef9SDimitry Andric 576*e8d8bef9SDimitry Andric bool hasD16Images() const { 577*e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 578*e8d8bef9SDimitry Andric } 579*e8d8bef9SDimitry Andric 580*e8d8bef9SDimitry Andric /// Return if most LDS instructions have an m0 use that require m0 to be 581*e8d8bef9SDimitry Andric /// iniitalized. 582*e8d8bef9SDimitry Andric bool ldsRequiresM0Init() const { 583*e8d8bef9SDimitry Andric return getGeneration() < GFX9; 584*e8d8bef9SDimitry Andric } 585*e8d8bef9SDimitry Andric 586*e8d8bef9SDimitry Andric // True if the hardware rewinds and replays GWS operations if a wave is 587*e8d8bef9SDimitry Andric // preempted. 588*e8d8bef9SDimitry Andric // 589*e8d8bef9SDimitry Andric // If this is false, a GWS operation requires testing if a nack set the 590*e8d8bef9SDimitry Andric // MEM_VIOL bit, and repeating if so. 591*e8d8bef9SDimitry Andric bool hasGWSAutoReplay() const { 592*e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 593*e8d8bef9SDimitry Andric } 594*e8d8bef9SDimitry Andric 595*e8d8bef9SDimitry Andric /// \returns if target has ds_gws_sema_release_all instruction. 596*e8d8bef9SDimitry Andric bool hasGWSSemaReleaseAll() const { 597*e8d8bef9SDimitry Andric return CIInsts; 598*e8d8bef9SDimitry Andric } 599*e8d8bef9SDimitry Andric 600*e8d8bef9SDimitry Andric /// \returns true if the target has integer add/sub instructions that do not 601*e8d8bef9SDimitry Andric /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, 602*e8d8bef9SDimitry Andric /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier 603*e8d8bef9SDimitry Andric /// for saturation. 604*e8d8bef9SDimitry Andric bool hasAddNoCarry() const { 605*e8d8bef9SDimitry Andric return AddNoCarryInsts; 606*e8d8bef9SDimitry Andric } 607*e8d8bef9SDimitry Andric 608*e8d8bef9SDimitry Andric bool hasUnpackedD16VMem() const { 609*e8d8bef9SDimitry Andric return HasUnpackedD16VMem; 610*e8d8bef9SDimitry Andric } 611*e8d8bef9SDimitry Andric 612*e8d8bef9SDimitry Andric // Covers VS/PS/CS graphics shaders 613*e8d8bef9SDimitry Andric bool isMesaGfxShader(const Function &F) const { 614*e8d8bef9SDimitry Andric return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); 615*e8d8bef9SDimitry Andric } 616*e8d8bef9SDimitry Andric 617*e8d8bef9SDimitry Andric bool hasMad64_32() const { 618*e8d8bef9SDimitry Andric return getGeneration() >= SEA_ISLANDS; 619*e8d8bef9SDimitry Andric } 620*e8d8bef9SDimitry Andric 621*e8d8bef9SDimitry Andric bool hasSDWAOmod() const { 622*e8d8bef9SDimitry Andric return HasSDWAOmod; 623*e8d8bef9SDimitry Andric } 624*e8d8bef9SDimitry Andric 625*e8d8bef9SDimitry Andric bool hasSDWAScalar() const { 626*e8d8bef9SDimitry Andric return HasSDWAScalar; 627*e8d8bef9SDimitry Andric } 628*e8d8bef9SDimitry Andric 629*e8d8bef9SDimitry Andric bool hasSDWASdst() const { 630*e8d8bef9SDimitry Andric return HasSDWASdst; 631*e8d8bef9SDimitry Andric } 632*e8d8bef9SDimitry Andric 633*e8d8bef9SDimitry Andric bool hasSDWAMac() const { 634*e8d8bef9SDimitry Andric return HasSDWAMac; 635*e8d8bef9SDimitry Andric } 636*e8d8bef9SDimitry Andric 637*e8d8bef9SDimitry Andric bool hasSDWAOutModsVOPC() const { 638*e8d8bef9SDimitry Andric return HasSDWAOutModsVOPC; 639*e8d8bef9SDimitry Andric } 640*e8d8bef9SDimitry Andric 641*e8d8bef9SDimitry Andric bool hasDLInsts() const { 642*e8d8bef9SDimitry Andric return HasDLInsts; 643*e8d8bef9SDimitry Andric } 644*e8d8bef9SDimitry Andric 645*e8d8bef9SDimitry Andric bool hasDot1Insts() const { 646*e8d8bef9SDimitry Andric return HasDot1Insts; 647*e8d8bef9SDimitry Andric } 648*e8d8bef9SDimitry Andric 649*e8d8bef9SDimitry Andric bool hasDot2Insts() const { 650*e8d8bef9SDimitry Andric return HasDot2Insts; 651*e8d8bef9SDimitry Andric } 652*e8d8bef9SDimitry Andric 653*e8d8bef9SDimitry Andric bool hasDot3Insts() const { 654*e8d8bef9SDimitry Andric return HasDot3Insts; 655*e8d8bef9SDimitry Andric } 656*e8d8bef9SDimitry Andric 657*e8d8bef9SDimitry Andric bool hasDot4Insts() const { 658*e8d8bef9SDimitry Andric return HasDot4Insts; 659*e8d8bef9SDimitry Andric } 660*e8d8bef9SDimitry Andric 661*e8d8bef9SDimitry Andric bool hasDot5Insts() const { 662*e8d8bef9SDimitry Andric return HasDot5Insts; 663*e8d8bef9SDimitry Andric } 664*e8d8bef9SDimitry Andric 665*e8d8bef9SDimitry Andric bool hasDot6Insts() const { 666*e8d8bef9SDimitry Andric return HasDot6Insts; 667*e8d8bef9SDimitry Andric } 668*e8d8bef9SDimitry Andric 669*e8d8bef9SDimitry Andric bool hasMAIInsts() const { 670*e8d8bef9SDimitry Andric return HasMAIInsts; 671*e8d8bef9SDimitry Andric } 672*e8d8bef9SDimitry Andric 673*e8d8bef9SDimitry Andric bool hasPkFmacF16Inst() const { 674*e8d8bef9SDimitry Andric return HasPkFmacF16Inst; 675*e8d8bef9SDimitry Andric } 676*e8d8bef9SDimitry Andric 677*e8d8bef9SDimitry Andric bool hasAtomicFaddInsts() const { 678*e8d8bef9SDimitry Andric return HasAtomicFaddInsts; 679*e8d8bef9SDimitry Andric } 680*e8d8bef9SDimitry Andric 681*e8d8bef9SDimitry Andric bool hasNoSdstCMPX() const { 682*e8d8bef9SDimitry Andric return HasNoSdstCMPX; 683*e8d8bef9SDimitry Andric } 684*e8d8bef9SDimitry Andric 685*e8d8bef9SDimitry Andric bool hasVscnt() const { 686*e8d8bef9SDimitry Andric return HasVscnt; 687*e8d8bef9SDimitry Andric } 688*e8d8bef9SDimitry Andric 689*e8d8bef9SDimitry Andric bool hasGetWaveIdInst() const { 690*e8d8bef9SDimitry Andric return HasGetWaveIdInst; 691*e8d8bef9SDimitry Andric } 692*e8d8bef9SDimitry Andric 693*e8d8bef9SDimitry Andric bool hasSMemTimeInst() const { 694*e8d8bef9SDimitry Andric return HasSMemTimeInst; 695*e8d8bef9SDimitry Andric } 696*e8d8bef9SDimitry Andric 697*e8d8bef9SDimitry Andric bool hasRegisterBanking() const { 698*e8d8bef9SDimitry Andric return HasRegisterBanking; 699*e8d8bef9SDimitry Andric } 700*e8d8bef9SDimitry Andric 701*e8d8bef9SDimitry Andric bool hasVOP3Literal() const { 702*e8d8bef9SDimitry Andric return HasVOP3Literal; 703*e8d8bef9SDimitry Andric } 704*e8d8bef9SDimitry Andric 705*e8d8bef9SDimitry Andric bool hasNoDataDepHazard() const { 706*e8d8bef9SDimitry Andric return HasNoDataDepHazard; 707*e8d8bef9SDimitry Andric } 708*e8d8bef9SDimitry Andric 709*e8d8bef9SDimitry Andric bool vmemWriteNeedsExpWaitcnt() const { 710*e8d8bef9SDimitry Andric return getGeneration() < SEA_ISLANDS; 711*e8d8bef9SDimitry Andric } 712*e8d8bef9SDimitry Andric 713*e8d8bef9SDimitry Andric // Scratch is allocated in 256 dword per wave blocks for the entire 714*e8d8bef9SDimitry Andric // wavefront. When viewed from the perspecive of an arbitrary workitem, this 715*e8d8bef9SDimitry Andric // is 4-byte aligned. 716*e8d8bef9SDimitry Andric // 717*e8d8bef9SDimitry Andric // Only 4-byte alignment is really needed to access anything. Transformations 718*e8d8bef9SDimitry Andric // on the pointer value itself may rely on the alignment / known low bits of 719*e8d8bef9SDimitry Andric // the pointer. Set this to something above the minimum to avoid needing 720*e8d8bef9SDimitry Andric // dynamic realignment in common cases. 721*e8d8bef9SDimitry Andric Align getStackAlignment() const { return Align(16); } 722*e8d8bef9SDimitry Andric 723*e8d8bef9SDimitry Andric bool enableMachineScheduler() const override { 724*e8d8bef9SDimitry Andric return true; 725*e8d8bef9SDimitry Andric } 726*e8d8bef9SDimitry Andric 727*e8d8bef9SDimitry Andric bool useAA() const override; 728*e8d8bef9SDimitry Andric 729*e8d8bef9SDimitry Andric bool enableSubRegLiveness() const override { 730*e8d8bef9SDimitry Andric return true; 731*e8d8bef9SDimitry Andric } 732*e8d8bef9SDimitry Andric 733*e8d8bef9SDimitry Andric void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } 734*e8d8bef9SDimitry Andric bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } 735*e8d8bef9SDimitry Andric 736*e8d8bef9SDimitry Andric // static wrappers 737*e8d8bef9SDimitry Andric static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); 738*e8d8bef9SDimitry Andric 739*e8d8bef9SDimitry Andric // XXX - Why is this here if it isn't in the default pass set? 740*e8d8bef9SDimitry Andric bool enableEarlyIfConversion() const override { 741*e8d8bef9SDimitry Andric return true; 742*e8d8bef9SDimitry Andric } 743*e8d8bef9SDimitry Andric 744*e8d8bef9SDimitry Andric bool enableFlatScratch() const; 745*e8d8bef9SDimitry Andric 746*e8d8bef9SDimitry Andric void overrideSchedPolicy(MachineSchedPolicy &Policy, 747*e8d8bef9SDimitry Andric unsigned NumRegionInstrs) const override; 748*e8d8bef9SDimitry Andric 749*e8d8bef9SDimitry Andric unsigned getMaxNumUserSGPRs() const { 750*e8d8bef9SDimitry Andric return 16; 751*e8d8bef9SDimitry Andric } 752*e8d8bef9SDimitry Andric 753*e8d8bef9SDimitry Andric bool hasSMemRealTime() const { 754*e8d8bef9SDimitry Andric return HasSMemRealTime; 755*e8d8bef9SDimitry Andric } 756*e8d8bef9SDimitry Andric 757*e8d8bef9SDimitry Andric bool hasMovrel() const { 758*e8d8bef9SDimitry Andric return HasMovrel; 759*e8d8bef9SDimitry Andric } 760*e8d8bef9SDimitry Andric 761*e8d8bef9SDimitry Andric bool hasVGPRIndexMode() const { 762*e8d8bef9SDimitry Andric return HasVGPRIndexMode; 763*e8d8bef9SDimitry Andric } 764*e8d8bef9SDimitry Andric 765*e8d8bef9SDimitry Andric bool useVGPRIndexMode() const; 766*e8d8bef9SDimitry Andric 767*e8d8bef9SDimitry Andric bool hasScalarCompareEq64() const { 768*e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 769*e8d8bef9SDimitry Andric } 770*e8d8bef9SDimitry Andric 771*e8d8bef9SDimitry Andric bool hasScalarStores() const { 772*e8d8bef9SDimitry Andric return HasScalarStores; 773*e8d8bef9SDimitry Andric } 774*e8d8bef9SDimitry Andric 775*e8d8bef9SDimitry Andric bool hasScalarAtomics() const { 776*e8d8bef9SDimitry Andric return HasScalarAtomics; 777*e8d8bef9SDimitry Andric } 778*e8d8bef9SDimitry Andric 779*e8d8bef9SDimitry Andric bool hasLDSFPAtomics() const { 780*e8d8bef9SDimitry Andric return GFX8Insts; 781*e8d8bef9SDimitry Andric } 782*e8d8bef9SDimitry Andric 783*e8d8bef9SDimitry Andric bool hasDPP() const { 784*e8d8bef9SDimitry Andric return HasDPP; 785*e8d8bef9SDimitry Andric } 786*e8d8bef9SDimitry Andric 787*e8d8bef9SDimitry Andric bool hasDPPBroadcasts() const { 788*e8d8bef9SDimitry Andric return HasDPP && getGeneration() < GFX10; 789*e8d8bef9SDimitry Andric } 790*e8d8bef9SDimitry Andric 791*e8d8bef9SDimitry Andric bool hasDPPWavefrontShifts() const { 792*e8d8bef9SDimitry Andric return HasDPP && getGeneration() < GFX10; 793*e8d8bef9SDimitry Andric } 794*e8d8bef9SDimitry Andric 795*e8d8bef9SDimitry Andric bool hasDPP8() const { 796*e8d8bef9SDimitry Andric return HasDPP8; 797*e8d8bef9SDimitry Andric } 798*e8d8bef9SDimitry Andric 799*e8d8bef9SDimitry Andric bool hasR128A16() const { 800*e8d8bef9SDimitry Andric return HasR128A16; 801*e8d8bef9SDimitry Andric } 802*e8d8bef9SDimitry Andric 803*e8d8bef9SDimitry Andric bool hasGFX10A16() const { 804*e8d8bef9SDimitry Andric return HasGFX10A16; 805*e8d8bef9SDimitry Andric } 806*e8d8bef9SDimitry Andric 807*e8d8bef9SDimitry Andric bool hasA16() const { return hasR128A16() || hasGFX10A16(); } 808*e8d8bef9SDimitry Andric 809*e8d8bef9SDimitry Andric bool hasG16() const { return HasG16; } 810*e8d8bef9SDimitry Andric 811*e8d8bef9SDimitry Andric bool hasOffset3fBug() const { 812*e8d8bef9SDimitry Andric return HasOffset3fBug; 813*e8d8bef9SDimitry Andric } 814*e8d8bef9SDimitry Andric 815*e8d8bef9SDimitry Andric bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } 816*e8d8bef9SDimitry Andric 817*e8d8bef9SDimitry Andric bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } 818*e8d8bef9SDimitry Andric 819*e8d8bef9SDimitry Andric bool hasNSAEncoding() const { return HasNSAEncoding; } 820*e8d8bef9SDimitry Andric 821*e8d8bef9SDimitry Andric bool hasGFX10_BEncoding() const { 822*e8d8bef9SDimitry Andric return GFX10_BEncoding; 823*e8d8bef9SDimitry Andric } 824*e8d8bef9SDimitry Andric 825*e8d8bef9SDimitry Andric bool hasGFX10_3Insts() const { 826*e8d8bef9SDimitry Andric return GFX10_3Insts; 827*e8d8bef9SDimitry Andric } 828*e8d8bef9SDimitry Andric 829*e8d8bef9SDimitry Andric bool hasMadF16() const; 830*e8d8bef9SDimitry Andric 831*e8d8bef9SDimitry Andric bool enableSIScheduler() const { 832*e8d8bef9SDimitry Andric return EnableSIScheduler; 833*e8d8bef9SDimitry Andric } 834*e8d8bef9SDimitry Andric 835*e8d8bef9SDimitry Andric bool loadStoreOptEnabled() const { 836*e8d8bef9SDimitry Andric return EnableLoadStoreOpt; 837*e8d8bef9SDimitry Andric } 838*e8d8bef9SDimitry Andric 839*e8d8bef9SDimitry Andric bool hasSGPRInitBug() const { 840*e8d8bef9SDimitry Andric return SGPRInitBug; 841*e8d8bef9SDimitry Andric } 842*e8d8bef9SDimitry Andric 843*e8d8bef9SDimitry Andric bool hasMFMAInlineLiteralBug() const { 844*e8d8bef9SDimitry Andric return HasMFMAInlineLiteralBug; 845*e8d8bef9SDimitry Andric } 846*e8d8bef9SDimitry Andric 847*e8d8bef9SDimitry Andric bool has12DWordStoreHazard() const { 848*e8d8bef9SDimitry Andric return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; 849*e8d8bef9SDimitry Andric } 850*e8d8bef9SDimitry Andric 851*e8d8bef9SDimitry Andric // \returns true if the subtarget supports DWORDX3 load/store instructions. 852*e8d8bef9SDimitry Andric bool hasDwordx3LoadStores() const { 853*e8d8bef9SDimitry Andric return CIInsts; 854*e8d8bef9SDimitry Andric } 855*e8d8bef9SDimitry Andric 856*e8d8bef9SDimitry Andric bool hasReadM0MovRelInterpHazard() const { 857*e8d8bef9SDimitry Andric return getGeneration() == AMDGPUSubtarget::GFX9; 858*e8d8bef9SDimitry Andric } 859*e8d8bef9SDimitry Andric 860*e8d8bef9SDimitry Andric bool hasReadM0SendMsgHazard() const { 861*e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && 862*e8d8bef9SDimitry Andric getGeneration() <= AMDGPUSubtarget::GFX9; 863*e8d8bef9SDimitry Andric } 864*e8d8bef9SDimitry Andric 865*e8d8bef9SDimitry Andric bool hasVcmpxPermlaneHazard() const { 866*e8d8bef9SDimitry Andric return HasVcmpxPermlaneHazard; 867*e8d8bef9SDimitry Andric } 868*e8d8bef9SDimitry Andric 869*e8d8bef9SDimitry Andric bool hasVMEMtoScalarWriteHazard() const { 870*e8d8bef9SDimitry Andric return HasVMEMtoScalarWriteHazard; 871*e8d8bef9SDimitry Andric } 872*e8d8bef9SDimitry Andric 873*e8d8bef9SDimitry Andric bool hasSMEMtoVectorWriteHazard() const { 874*e8d8bef9SDimitry Andric return HasSMEMtoVectorWriteHazard; 875*e8d8bef9SDimitry Andric } 876*e8d8bef9SDimitry Andric 877*e8d8bef9SDimitry Andric bool hasLDSMisalignedBug() const { 878*e8d8bef9SDimitry Andric return LDSMisalignedBug && !EnableCuMode; 879*e8d8bef9SDimitry Andric } 880*e8d8bef9SDimitry Andric 881*e8d8bef9SDimitry Andric bool hasInstFwdPrefetchBug() const { 882*e8d8bef9SDimitry Andric return HasInstFwdPrefetchBug; 883*e8d8bef9SDimitry Andric } 884*e8d8bef9SDimitry Andric 885*e8d8bef9SDimitry Andric bool hasVcmpxExecWARHazard() const { 886*e8d8bef9SDimitry Andric return HasVcmpxExecWARHazard; 887*e8d8bef9SDimitry Andric } 888*e8d8bef9SDimitry Andric 889*e8d8bef9SDimitry Andric bool hasLdsBranchVmemWARHazard() const { 890*e8d8bef9SDimitry Andric return HasLdsBranchVmemWARHazard; 891*e8d8bef9SDimitry Andric } 892*e8d8bef9SDimitry Andric 893*e8d8bef9SDimitry Andric bool hasNSAtoVMEMBug() const { 894*e8d8bef9SDimitry Andric return HasNSAtoVMEMBug; 895*e8d8bef9SDimitry Andric } 896*e8d8bef9SDimitry Andric 897*e8d8bef9SDimitry Andric bool hasHardClauses() const { return getGeneration() >= GFX10; } 898*e8d8bef9SDimitry Andric 899*e8d8bef9SDimitry Andric /// Return the maximum number of waves per SIMD for kernels using \p SGPRs 900*e8d8bef9SDimitry Andric /// SGPRs 901*e8d8bef9SDimitry Andric unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; 902*e8d8bef9SDimitry Andric 903*e8d8bef9SDimitry Andric /// Return the maximum number of waves per SIMD for kernels using \p VGPRs 904*e8d8bef9SDimitry Andric /// VGPRs 905*e8d8bef9SDimitry Andric unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; 906*e8d8bef9SDimitry Andric 907*e8d8bef9SDimitry Andric /// Return occupancy for the given function. Used LDS and a number of 908*e8d8bef9SDimitry Andric /// registers if provided. 909*e8d8bef9SDimitry Andric /// Note, occupancy can be affected by the scratch allocation as well, but 910*e8d8bef9SDimitry Andric /// we do not have enough information to compute it. 911*e8d8bef9SDimitry Andric unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, 912*e8d8bef9SDimitry Andric unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; 913*e8d8bef9SDimitry Andric 914*e8d8bef9SDimitry Andric /// \returns true if the flat_scratch register should be initialized with the 915*e8d8bef9SDimitry Andric /// pointer to the wave's scratch memory rather than a size and offset. 916*e8d8bef9SDimitry Andric bool flatScratchIsPointer() const { 917*e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 918*e8d8bef9SDimitry Andric } 919*e8d8bef9SDimitry Andric 920*e8d8bef9SDimitry Andric /// \returns true if the machine has merged shaders in which s0-s7 are 921*e8d8bef9SDimitry Andric /// reserved by the hardware and user SGPRs start at s8 922*e8d8bef9SDimitry Andric bool hasMergedShaders() const { 923*e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 924*e8d8bef9SDimitry Andric } 925*e8d8bef9SDimitry Andric 926*e8d8bef9SDimitry Andric /// \returns SGPR allocation granularity supported by the subtarget. 927*e8d8bef9SDimitry Andric unsigned getSGPRAllocGranule() const { 928*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getSGPRAllocGranule(this); 929*e8d8bef9SDimitry Andric } 930*e8d8bef9SDimitry Andric 931*e8d8bef9SDimitry Andric /// \returns SGPR encoding granularity supported by the subtarget. 932*e8d8bef9SDimitry Andric unsigned getSGPREncodingGranule() const { 933*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getSGPREncodingGranule(this); 934*e8d8bef9SDimitry Andric } 935*e8d8bef9SDimitry Andric 936*e8d8bef9SDimitry Andric /// \returns Total number of SGPRs supported by the subtarget. 937*e8d8bef9SDimitry Andric unsigned getTotalNumSGPRs() const { 938*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getTotalNumSGPRs(this); 939*e8d8bef9SDimitry Andric } 940*e8d8bef9SDimitry Andric 941*e8d8bef9SDimitry Andric /// \returns Addressable number of SGPRs supported by the subtarget. 942*e8d8bef9SDimitry Andric unsigned getAddressableNumSGPRs() const { 943*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); 944*e8d8bef9SDimitry Andric } 945*e8d8bef9SDimitry Andric 946*e8d8bef9SDimitry Andric /// \returns Minimum number of SGPRs that meets the given number of waves per 947*e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 948*e8d8bef9SDimitry Andric unsigned getMinNumSGPRs(unsigned WavesPerEU) const { 949*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); 950*e8d8bef9SDimitry Andric } 951*e8d8bef9SDimitry Andric 952*e8d8bef9SDimitry Andric /// \returns Maximum number of SGPRs that meets the given number of waves per 953*e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 954*e8d8bef9SDimitry Andric unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { 955*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); 956*e8d8bef9SDimitry Andric } 957*e8d8bef9SDimitry Andric 958*e8d8bef9SDimitry Andric /// \returns Reserved number of SGPRs for given function \p MF. 959*e8d8bef9SDimitry Andric unsigned getReservedNumSGPRs(const MachineFunction &MF) const; 960*e8d8bef9SDimitry Andric 961*e8d8bef9SDimitry Andric /// \returns Maximum number of SGPRs that meets number of waves per execution 962*e8d8bef9SDimitry Andric /// unit requirement for function \p MF, or number of SGPRs explicitly 963*e8d8bef9SDimitry Andric /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. 964*e8d8bef9SDimitry Andric /// 965*e8d8bef9SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 966*e8d8bef9SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 967*e8d8bef9SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 968*e8d8bef9SDimitry Andric /// unit requirement. 969*e8d8bef9SDimitry Andric unsigned getMaxNumSGPRs(const MachineFunction &MF) const; 970*e8d8bef9SDimitry Andric 971*e8d8bef9SDimitry Andric /// \returns VGPR allocation granularity supported by the subtarget. 972*e8d8bef9SDimitry Andric unsigned getVGPRAllocGranule() const { 973*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getVGPRAllocGranule(this); 974*e8d8bef9SDimitry Andric } 975*e8d8bef9SDimitry Andric 976*e8d8bef9SDimitry Andric /// \returns VGPR encoding granularity supported by the subtarget. 977*e8d8bef9SDimitry Andric unsigned getVGPREncodingGranule() const { 978*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getVGPREncodingGranule(this); 979*e8d8bef9SDimitry Andric } 980*e8d8bef9SDimitry Andric 981*e8d8bef9SDimitry Andric /// \returns Total number of VGPRs supported by the subtarget. 982*e8d8bef9SDimitry Andric unsigned getTotalNumVGPRs() const { 983*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getTotalNumVGPRs(this); 984*e8d8bef9SDimitry Andric } 985*e8d8bef9SDimitry Andric 986*e8d8bef9SDimitry Andric /// \returns Addressable number of VGPRs supported by the subtarget. 987*e8d8bef9SDimitry Andric unsigned getAddressableNumVGPRs() const { 988*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); 989*e8d8bef9SDimitry Andric } 990*e8d8bef9SDimitry Andric 991*e8d8bef9SDimitry Andric /// \returns Minimum number of VGPRs that meets given number of waves per 992*e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 993*e8d8bef9SDimitry Andric unsigned getMinNumVGPRs(unsigned WavesPerEU) const { 994*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); 995*e8d8bef9SDimitry Andric } 996*e8d8bef9SDimitry Andric 997*e8d8bef9SDimitry Andric /// \returns Maximum number of VGPRs that meets given number of waves per 998*e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 999*e8d8bef9SDimitry Andric unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { 1000*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); 1001*e8d8bef9SDimitry Andric } 1002*e8d8bef9SDimitry Andric 1003*e8d8bef9SDimitry Andric /// \returns Maximum number of VGPRs that meets number of waves per execution 1004*e8d8bef9SDimitry Andric /// unit requirement for function \p MF, or number of VGPRs explicitly 1005*e8d8bef9SDimitry Andric /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. 1006*e8d8bef9SDimitry Andric /// 1007*e8d8bef9SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1008*e8d8bef9SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1009*e8d8bef9SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1010*e8d8bef9SDimitry Andric /// unit requirement. 1011*e8d8bef9SDimitry Andric unsigned getMaxNumVGPRs(const MachineFunction &MF) const; 1012*e8d8bef9SDimitry Andric 1013*e8d8bef9SDimitry Andric void getPostRAMutations( 1014*e8d8bef9SDimitry Andric std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) 1015*e8d8bef9SDimitry Andric const override; 1016*e8d8bef9SDimitry Andric 1017*e8d8bef9SDimitry Andric bool isWave32() const { 1018*e8d8bef9SDimitry Andric return getWavefrontSize() == 32; 1019*e8d8bef9SDimitry Andric } 1020*e8d8bef9SDimitry Andric 1021*e8d8bef9SDimitry Andric bool isWave64() const { 1022*e8d8bef9SDimitry Andric return getWavefrontSize() == 64; 1023*e8d8bef9SDimitry Andric } 1024*e8d8bef9SDimitry Andric 1025*e8d8bef9SDimitry Andric const TargetRegisterClass *getBoolRC() const { 1026*e8d8bef9SDimitry Andric return getRegisterInfo()->getBoolRC(); 1027*e8d8bef9SDimitry Andric } 1028*e8d8bef9SDimitry Andric 1029*e8d8bef9SDimitry Andric /// \returns Maximum number of work groups per compute unit supported by the 1030*e8d8bef9SDimitry Andric /// subtarget and limited by given \p FlatWorkGroupSize. 1031*e8d8bef9SDimitry Andric unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { 1032*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); 1033*e8d8bef9SDimitry Andric } 1034*e8d8bef9SDimitry Andric 1035*e8d8bef9SDimitry Andric /// \returns Minimum flat work group size supported by the subtarget. 1036*e8d8bef9SDimitry Andric unsigned getMinFlatWorkGroupSize() const override { 1037*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); 1038*e8d8bef9SDimitry Andric } 1039*e8d8bef9SDimitry Andric 1040*e8d8bef9SDimitry Andric /// \returns Maximum flat work group size supported by the subtarget. 1041*e8d8bef9SDimitry Andric unsigned getMaxFlatWorkGroupSize() const override { 1042*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); 1043*e8d8bef9SDimitry Andric } 1044*e8d8bef9SDimitry Andric 1045*e8d8bef9SDimitry Andric /// \returns Number of waves per execution unit required to support the given 1046*e8d8bef9SDimitry Andric /// \p FlatWorkGroupSize. 1047*e8d8bef9SDimitry Andric unsigned 1048*e8d8bef9SDimitry Andric getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { 1049*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); 1050*e8d8bef9SDimitry Andric } 1051*e8d8bef9SDimitry Andric 1052*e8d8bef9SDimitry Andric /// \returns Minimum number of waves per execution unit supported by the 1053*e8d8bef9SDimitry Andric /// subtarget. 1054*e8d8bef9SDimitry Andric unsigned getMinWavesPerEU() const override { 1055*e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinWavesPerEU(this); 1056*e8d8bef9SDimitry Andric } 1057*e8d8bef9SDimitry Andric 1058*e8d8bef9SDimitry Andric void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 1059*e8d8bef9SDimitry Andric SDep &Dep) const override; 1060*e8d8bef9SDimitry Andric }; 1061*e8d8bef9SDimitry Andric 1062*e8d8bef9SDimitry Andric } // end namespace llvm 1063*e8d8bef9SDimitry Andric 1064*e8d8bef9SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 1065