1f4a2713aSLionel Sambuc //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc // The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc //
10f4a2713aSLionel Sambuc /// \file
11f4a2713aSLionel Sambuc /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
12f4a2713aSLionel Sambuc //
13f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
14f4a2713aSLionel Sambuc
15f4a2713aSLionel Sambuc #include "AMDGPUSubtarget.h"
16*0a6a1f1dSLionel Sambuc #include "R600ISelLowering.h"
17*0a6a1f1dSLionel Sambuc #include "R600InstrInfo.h"
18*0a6a1f1dSLionel Sambuc #include "R600MachineScheduler.h"
19*0a6a1f1dSLionel Sambuc #include "SIISelLowering.h"
20*0a6a1f1dSLionel Sambuc #include "SIInstrInfo.h"
21*0a6a1f1dSLionel Sambuc #include "SIMachineFunctionInfo.h"
22*0a6a1f1dSLionel Sambuc #include "llvm/ADT/SmallString.h"
23*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineScheduler.h"
24f4a2713aSLionel Sambuc
25f4a2713aSLionel Sambuc using namespace llvm;
26f4a2713aSLionel Sambuc
27*0a6a1f1dSLionel Sambuc #define DEBUG_TYPE "amdgpu-subtarget"
28*0a6a1f1dSLionel Sambuc
29f4a2713aSLionel Sambuc #define GET_SUBTARGETINFO_ENUM
30f4a2713aSLionel Sambuc #define GET_SUBTARGETINFO_TARGET_DESC
31f4a2713aSLionel Sambuc #define GET_SUBTARGETINFO_CTOR
32f4a2713aSLionel Sambuc #include "AMDGPUGenSubtargetInfo.inc"
33f4a2713aSLionel Sambuc
computeDataLayout(const AMDGPUSubtarget & ST)34*0a6a1f1dSLionel Sambuc static std::string computeDataLayout(const AMDGPUSubtarget &ST) {
35*0a6a1f1dSLionel Sambuc std::string Ret = "e-p:32:32";
36f4a2713aSLionel Sambuc
37*0a6a1f1dSLionel Sambuc if (ST.is64bit()) {
38*0a6a1f1dSLionel Sambuc // 32-bit private, local, and region pointers. 64-bit global and constant.
39*0a6a1f1dSLionel Sambuc Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
40f4a2713aSLionel Sambuc }
41f4a2713aSLionel Sambuc
42*0a6a1f1dSLionel Sambuc Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
43*0a6a1f1dSLionel Sambuc "-v512:512-v1024:1024-v2048:2048-n32:64";
44*0a6a1f1dSLionel Sambuc
45*0a6a1f1dSLionel Sambuc return Ret;
46f4a2713aSLionel Sambuc }
47*0a6a1f1dSLionel Sambuc
48*0a6a1f1dSLionel Sambuc AMDGPUSubtarget &
initializeSubtargetDependencies(StringRef GPU,StringRef FS)49*0a6a1f1dSLionel Sambuc AMDGPUSubtarget::initializeSubtargetDependencies(StringRef GPU, StringRef FS) {
50*0a6a1f1dSLionel Sambuc // Determine default and user-specified characteristics
51*0a6a1f1dSLionel Sambuc // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
52*0a6a1f1dSLionel Sambuc // enabled, but some instructions do not respect them and they run at the
53*0a6a1f1dSLionel Sambuc // double precision rate, so don't enable by default.
54*0a6a1f1dSLionel Sambuc //
55*0a6a1f1dSLionel Sambuc // We want to be able to turn these off, but making this a subtarget feature
56*0a6a1f1dSLionel Sambuc // for SI has the unhelpful behavior that it unsets everything else if you
57*0a6a1f1dSLionel Sambuc // disable it.
58*0a6a1f1dSLionel Sambuc
59*0a6a1f1dSLionel Sambuc SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
60*0a6a1f1dSLionel Sambuc FullFS += FS;
61*0a6a1f1dSLionel Sambuc
62*0a6a1f1dSLionel Sambuc ParseSubtargetFeatures(GPU, FullFS);
63*0a6a1f1dSLionel Sambuc
64*0a6a1f1dSLionel Sambuc // FIXME: I don't think think Evergreen has any useful support for
65*0a6a1f1dSLionel Sambuc // denormals, but should be checked. Should we issue a warning somewhere
66*0a6a1f1dSLionel Sambuc // if someone tries to enable these?
67*0a6a1f1dSLionel Sambuc if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
68*0a6a1f1dSLionel Sambuc FP32Denormals = false;
69*0a6a1f1dSLionel Sambuc FP64Denormals = false;
70f4a2713aSLionel Sambuc }
71*0a6a1f1dSLionel Sambuc return *this;
72f4a2713aSLionel Sambuc }
73*0a6a1f1dSLionel Sambuc
AMDGPUSubtarget(StringRef TT,StringRef GPU,StringRef FS,TargetMachine & TM)74*0a6a1f1dSLionel Sambuc AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
75*0a6a1f1dSLionel Sambuc TargetMachine &TM)
76*0a6a1f1dSLionel Sambuc : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
77*0a6a1f1dSLionel Sambuc DumpCode(false), R600ALUInst(false), HasVertexCache(false),
78*0a6a1f1dSLionel Sambuc TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
79*0a6a1f1dSLionel Sambuc FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
80*0a6a1f1dSLionel Sambuc FlatAddressSpace(false), EnableIRStructurizer(true),
81*0a6a1f1dSLionel Sambuc EnablePromoteAlloca(false), EnableIfCvt(true),
82*0a6a1f1dSLionel Sambuc EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
83*0a6a1f1dSLionel Sambuc EnableVGPRSpilling(false),SGPRInitBug(false),
84*0a6a1f1dSLionel Sambuc DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
85*0a6a1f1dSLionel Sambuc FrameLowering(TargetFrameLowering::StackGrowsUp,
86*0a6a1f1dSLionel Sambuc 64 * 16, // Maximum stack alignment (long16)
87*0a6a1f1dSLionel Sambuc 0),
88*0a6a1f1dSLionel Sambuc InstrItins(getInstrItineraryForCPU(GPU)),
89*0a6a1f1dSLionel Sambuc TargetTriple(TT) {
90*0a6a1f1dSLionel Sambuc if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
91*0a6a1f1dSLionel Sambuc InstrInfo.reset(new R600InstrInfo(*this));
92*0a6a1f1dSLionel Sambuc TLInfo.reset(new R600TargetLowering(TM));
93f4a2713aSLionel Sambuc } else {
94*0a6a1f1dSLionel Sambuc InstrInfo.reset(new SIInstrInfo(*this));
95*0a6a1f1dSLionel Sambuc TLInfo.reset(new SITargetLowering(TM));
96f4a2713aSLionel Sambuc }
97f4a2713aSLionel Sambuc }
98f4a2713aSLionel Sambuc
getStackEntrySize() const99*0a6a1f1dSLionel Sambuc unsigned AMDGPUSubtarget::getStackEntrySize() const {
100*0a6a1f1dSLionel Sambuc assert(getGeneration() <= NORTHERN_ISLANDS);
101*0a6a1f1dSLionel Sambuc switch(getWavefrontSize()) {
102*0a6a1f1dSLionel Sambuc case 16:
103*0a6a1f1dSLionel Sambuc return 8;
104*0a6a1f1dSLionel Sambuc case 32:
105*0a6a1f1dSLionel Sambuc return hasCaymanISA() ? 4 : 8;
106*0a6a1f1dSLionel Sambuc case 64:
107*0a6a1f1dSLionel Sambuc return 4;
108*0a6a1f1dSLionel Sambuc default:
109*0a6a1f1dSLionel Sambuc llvm_unreachable("Illegal wavefront size.");
110*0a6a1f1dSLionel Sambuc }
111f4a2713aSLionel Sambuc }
112f4a2713aSLionel Sambuc
getAmdKernelCodeChipID() const113*0a6a1f1dSLionel Sambuc unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
114*0a6a1f1dSLionel Sambuc switch(getGeneration()) {
115*0a6a1f1dSLionel Sambuc default: llvm_unreachable("ChipID unknown");
116*0a6a1f1dSLionel Sambuc case SEA_ISLANDS: return 12;
117*0a6a1f1dSLionel Sambuc }
118f4a2713aSLionel Sambuc }
119f4a2713aSLionel Sambuc
isVGPRSpillingEnabled(const SIMachineFunctionInfo * MFI) const120*0a6a1f1dSLionel Sambuc bool AMDGPUSubtarget::isVGPRSpillingEnabled(
121*0a6a1f1dSLionel Sambuc const SIMachineFunctionInfo *MFI) const {
122*0a6a1f1dSLionel Sambuc return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
123f4a2713aSLionel Sambuc }
124f4a2713aSLionel Sambuc
overrideSchedPolicy(MachineSchedPolicy & Policy,MachineInstr * begin,MachineInstr * end,unsigned NumRegionInstrs) const125*0a6a1f1dSLionel Sambuc void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
126*0a6a1f1dSLionel Sambuc MachineInstr *begin,
127*0a6a1f1dSLionel Sambuc MachineInstr *end,
128*0a6a1f1dSLionel Sambuc unsigned NumRegionInstrs) const {
129*0a6a1f1dSLionel Sambuc if (getGeneration() >= SOUTHERN_ISLANDS) {
130f4a2713aSLionel Sambuc
131*0a6a1f1dSLionel Sambuc // Track register pressure so the scheduler can try to decrease
132*0a6a1f1dSLionel Sambuc // pressure once register usage is above the threshold defined by
133*0a6a1f1dSLionel Sambuc // SIRegisterInfo::getRegPressureSetLimit()
134*0a6a1f1dSLionel Sambuc Policy.ShouldTrackPressure = true;
135*0a6a1f1dSLionel Sambuc
136*0a6a1f1dSLionel Sambuc // Enabling both top down and bottom up scheduling seems to give us less
137*0a6a1f1dSLionel Sambuc // register spills than just using one of these approaches on its own.
138*0a6a1f1dSLionel Sambuc Policy.OnlyTopDown = false;
139*0a6a1f1dSLionel Sambuc Policy.OnlyBottomUp = false;
140*0a6a1f1dSLionel Sambuc }
141f4a2713aSLionel Sambuc }
142