1 //===-- TargetParser - Parser for target features ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a target parser to recognise hardware features such as 10 // FPU/CPU/ARCH names as well as specific support such as HDIV, etc. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/TargetParser/TargetParser.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/TargetParser/Triple.h" 17 18 using namespace llvm; 19 using namespace AMDGPU; 20 21 namespace { 22 23 struct GPUInfo { 24 StringLiteral Name; 25 StringLiteral CanonicalName; 26 AMDGPU::GPUKind Kind; 27 unsigned Features; 28 }; 29 30 constexpr GPUInfo R600GPUs[] = { 31 // Name Canonical Kind Features 32 // Name 33 {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE }, 34 {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE }, 35 {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE }, 36 {{"r630"}, {"r630"}, GK_R630, FEATURE_NONE }, 37 {{"rs780"}, {"rs880"}, GK_RS880, FEATURE_NONE }, 38 {{"rs880"}, {"rs880"}, GK_RS880, FEATURE_NONE }, 39 {{"rv610"}, {"rs880"}, GK_RS880, FEATURE_NONE }, 40 {{"rv620"}, {"rs880"}, GK_RS880, FEATURE_NONE }, 41 {{"rv670"}, {"rv670"}, GK_RV670, FEATURE_NONE }, 42 {{"rv710"}, {"rv710"}, GK_RV710, FEATURE_NONE }, 43 {{"rv730"}, {"rv730"}, GK_RV730, FEATURE_NONE }, 44 {{"rv740"}, {"rv770"}, GK_RV770, FEATURE_NONE }, 45 {{"rv770"}, {"rv770"}, GK_RV770, FEATURE_NONE }, 46 {{"cedar"}, {"cedar"}, GK_CEDAR, FEATURE_NONE }, 47 {{"palm"}, {"cedar"}, GK_CEDAR, FEATURE_NONE }, 48 {{"cypress"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA }, 49 {{"hemlock"}, {"cypress"}, GK_CYPRESS, FEATURE_FMA }, 50 {{"juniper"}, {"juniper"}, GK_JUNIPER, FEATURE_NONE }, 51 {{"redwood"}, {"redwood"}, GK_REDWOOD, FEATURE_NONE }, 52 {{"sumo"}, {"sumo"}, GK_SUMO, FEATURE_NONE }, 53 {{"sumo2"}, {"sumo"}, GK_SUMO, FEATURE_NONE }, 54 {{"barts"}, {"barts"}, GK_BARTS, FEATURE_NONE }, 55 {{"caicos"}, {"caicos"}, GK_CAICOS, FEATURE_NONE }, 56 {{"aruba"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA }, 57 {{"cayman"}, {"cayman"}, GK_CAYMAN, FEATURE_FMA }, 58 {{"turks"}, {"turks"}, GK_TURKS, FEATURE_NONE } 59 }; 60 61 // This table should be sorted by the value of GPUKind 62 // Don't bother listing the implicitly true features 63 constexpr GPUInfo AMDGCNGPUs[] = { 64 // clang-format off 65 // Name Canonical Kind Features 66 // Name 67 {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, 68 {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, 69 {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, 70 {{"pitcairn"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, 71 {{"verde"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, 72 {{"gfx602"}, {"gfx602"}, GK_GFX602, FEATURE_NONE}, 73 {{"hainan"}, {"gfx602"}, GK_GFX602, FEATURE_NONE}, 74 {{"oland"}, {"gfx602"}, GK_GFX602, FEATURE_NONE}, 75 {{"gfx700"}, {"gfx700"}, GK_GFX700, FEATURE_NONE}, 76 {{"kaveri"}, {"gfx700"}, GK_GFX700, FEATURE_NONE}, 77 {{"gfx701"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32}, 78 {{"hawaii"}, {"gfx701"}, GK_GFX701, FEATURE_FAST_FMA_F32}, 79 {{"gfx702"}, {"gfx702"}, GK_GFX702, FEATURE_FAST_FMA_F32}, 80 {{"gfx703"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, 81 {{"kabini"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, 82 {{"mullins"}, {"gfx703"}, GK_GFX703, FEATURE_NONE}, 83 {{"gfx704"}, {"gfx704"}, GK_GFX704, FEATURE_NONE}, 84 {{"bonaire"}, {"gfx704"}, GK_GFX704, FEATURE_NONE}, 85 {{"gfx705"}, {"gfx705"}, GK_GFX705, FEATURE_NONE}, 86 {{"gfx801"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 87 {{"carrizo"}, {"gfx801"}, GK_GFX801, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 88 {{"gfx802"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, 89 {{"iceland"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, 90 {{"tonga"}, {"gfx802"}, GK_GFX802, FEATURE_FAST_DENORMAL_F32}, 91 {{"gfx803"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, 92 {{"fiji"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, 93 {{"polaris10"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, 94 {{"polaris11"}, {"gfx803"}, GK_GFX803, FEATURE_FAST_DENORMAL_F32}, 95 {{"gfx805"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32}, 96 {{"tongapro"}, {"gfx805"}, GK_GFX805, FEATURE_FAST_DENORMAL_F32}, 97 {{"gfx810"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 98 {{"stoney"}, {"gfx810"}, GK_GFX810, FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 99 {{"gfx900"}, {"gfx900"}, GK_GFX900, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 100 {{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 101 {{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 102 {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 103 {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 104 {{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 105 {{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 106 {{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 107 {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 108 {{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 109 {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 110 {{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 111 {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 112 {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 113 {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 114 {{"gfx1013"}, {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 115 {{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 116 {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 117 {{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 118 {{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 119 {{"gfx1034"}, {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 120 {{"gfx1035"}, {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 121 {{"gfx1036"}, {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 122 {{"gfx1100"}, {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 123 {{"gfx1101"}, {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 124 {{"gfx1102"}, {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 125 {{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 126 {{"gfx1150"}, {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 127 {{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 128 {{"gfx1152"}, {"gfx1152"}, GK_GFX1152, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 129 {{"gfx1153"}, {"gfx1153"}, GK_GFX1153, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 130 {{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 131 {{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 132 133 {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK}, 134 {{"gfx10-1-generic"}, {"gfx10-1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP}, 135 {{"gfx10-3-generic"}, {"gfx10-3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 136 {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 137 {{"gfx12-generic"}, {"gfx12-generic"}, GK_GFX12_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP}, 138 {{"gfx9-4-generic"}, {"gfx9-4-generic"}, GK_GFX9_4_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC}, 139 // clang-format on 140 }; 141 142 const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) { 143 GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE }; 144 145 auto I = 146 llvm::lower_bound(Table, Search, [](const GPUInfo &A, const GPUInfo &B) { 147 return A.Kind < B.Kind; 148 }); 149 150 if (I == Table.end() || I->Kind != Search.Kind) 151 return nullptr; 152 return I; 153 } 154 155 } // namespace 156 157 StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) { 158 switch (AK) { 159 case AMDGPU::GK_GFX9_GENERIC: 160 case AMDGPU::GK_GFX9_4_GENERIC: 161 return "gfx9"; 162 case AMDGPU::GK_GFX10_1_GENERIC: 163 case AMDGPU::GK_GFX10_3_GENERIC: 164 return "gfx10"; 165 case AMDGPU::GK_GFX11_GENERIC: 166 return "gfx11"; 167 case AMDGPU::GK_GFX12_GENERIC: 168 return "gfx12"; 169 default: { 170 StringRef ArchName = getArchNameAMDGCN(AK); 171 return ArchName.empty() ? "" : ArchName.drop_back(2); 172 } 173 } 174 } 175 176 StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) { 177 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) 178 return Entry->CanonicalName; 179 return ""; 180 } 181 182 StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) { 183 if (const auto *Entry = getArchEntry(AK, R600GPUs)) 184 return Entry->CanonicalName; 185 return ""; 186 } 187 188 AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) { 189 for (const auto &C : AMDGCNGPUs) { 190 if (CPU == C.Name) 191 return C.Kind; 192 } 193 194 return AMDGPU::GPUKind::GK_NONE; 195 } 196 197 AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) { 198 for (const auto &C : R600GPUs) { 199 if (CPU == C.Name) 200 return C.Kind; 201 } 202 203 return AMDGPU::GPUKind::GK_NONE; 204 } 205 206 unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) { 207 if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) 208 return Entry->Features; 209 return FEATURE_NONE; 210 } 211 212 unsigned AMDGPU::getArchAttrR600(GPUKind AK) { 213 if (const auto *Entry = getArchEntry(AK, R600GPUs)) 214 return Entry->Features; 215 return FEATURE_NONE; 216 } 217 218 void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) { 219 // XXX: Should this only report unique canonical names? 220 for (const auto &C : AMDGCNGPUs) 221 Values.push_back(C.Name); 222 } 223 224 void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) { 225 for (const auto &C : R600GPUs) 226 Values.push_back(C.Name); 227 } 228 229 AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { 230 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); 231 if (AK == AMDGPU::GPUKind::GK_NONE) { 232 if (GPU == "generic-hsa") 233 return {7, 0, 0}; 234 if (GPU == "generic") 235 return {6, 0, 0}; 236 return {0, 0, 0}; 237 } 238 239 // clang-format off 240 switch (AK) { 241 case GK_GFX600: return {6, 0, 0}; 242 case GK_GFX601: return {6, 0, 1}; 243 case GK_GFX602: return {6, 0, 2}; 244 case GK_GFX700: return {7, 0, 0}; 245 case GK_GFX701: return {7, 0, 1}; 246 case GK_GFX702: return {7, 0, 2}; 247 case GK_GFX703: return {7, 0, 3}; 248 case GK_GFX704: return {7, 0, 4}; 249 case GK_GFX705: return {7, 0, 5}; 250 case GK_GFX801: return {8, 0, 1}; 251 case GK_GFX802: return {8, 0, 2}; 252 case GK_GFX803: return {8, 0, 3}; 253 case GK_GFX805: return {8, 0, 5}; 254 case GK_GFX810: return {8, 1, 0}; 255 case GK_GFX900: return {9, 0, 0}; 256 case GK_GFX902: return {9, 0, 2}; 257 case GK_GFX904: return {9, 0, 4}; 258 case GK_GFX906: return {9, 0, 6}; 259 case GK_GFX908: return {9, 0, 8}; 260 case GK_GFX909: return {9, 0, 9}; 261 case GK_GFX90A: return {9, 0, 10}; 262 case GK_GFX90C: return {9, 0, 12}; 263 case GK_GFX940: return {9, 4, 0}; 264 case GK_GFX941: return {9, 4, 1}; 265 case GK_GFX942: return {9, 4, 2}; 266 case GK_GFX950: return {9, 5, 0}; 267 case GK_GFX1010: return {10, 1, 0}; 268 case GK_GFX1011: return {10, 1, 1}; 269 case GK_GFX1012: return {10, 1, 2}; 270 case GK_GFX1013: return {10, 1, 3}; 271 case GK_GFX1030: return {10, 3, 0}; 272 case GK_GFX1031: return {10, 3, 1}; 273 case GK_GFX1032: return {10, 3, 2}; 274 case GK_GFX1033: return {10, 3, 3}; 275 case GK_GFX1034: return {10, 3, 4}; 276 case GK_GFX1035: return {10, 3, 5}; 277 case GK_GFX1036: return {10, 3, 6}; 278 case GK_GFX1100: return {11, 0, 0}; 279 case GK_GFX1101: return {11, 0, 1}; 280 case GK_GFX1102: return {11, 0, 2}; 281 case GK_GFX1103: return {11, 0, 3}; 282 case GK_GFX1150: return {11, 5, 0}; 283 case GK_GFX1151: return {11, 5, 1}; 284 case GK_GFX1152: return {11, 5, 2}; 285 case GK_GFX1153: return {11, 5, 3}; 286 case GK_GFX1200: return {12, 0, 0}; 287 case GK_GFX1201: return {12, 0, 1}; 288 289 // Generic targets return the lowest common denominator 290 // within their family. That is, the ISA that is the most 291 // restricted in terms of features. 292 // 293 // gfx9-generic is tricky because there is no lowest 294 // common denominator, so we return gfx900 which has mad-mix 295 // but this family doesn't have it. 296 // 297 // This API should never be used to check for a particular 298 // feature anyway. 299 // 300 // TODO: Split up this API depending on its caller so 301 // generic target handling is more obvious and less risky. 302 case GK_GFX9_GENERIC: return {9, 0, 0}; 303 case GK_GFX9_4_GENERIC: return {9, 4, 0}; 304 case GK_GFX10_1_GENERIC: return {10, 1, 0}; 305 case GK_GFX10_3_GENERIC: return {10, 3, 0}; 306 case GK_GFX11_GENERIC: return {11, 0, 3}; 307 case GK_GFX12_GENERIC: return {12, 0, 0}; 308 default: return {0, 0, 0}; 309 } 310 // clang-format on 311 } 312 313 StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) { 314 assert(T.isAMDGPU()); 315 auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(Arch) : parseArchR600(Arch); 316 if (ProcKind == GK_NONE) 317 return StringRef(); 318 319 return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind); 320 } 321 322 void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, 323 StringMap<bool> &Features) { 324 // XXX - What does the member GPU mean if device name string passed here? 325 if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) { 326 // AMDGCN SPIRV must support the union of all AMDGCN features. This list 327 // should be kept in sorted order and updated whenever new features are 328 // added. 329 Features["16-bit-insts"] = true; 330 Features["ashr-pk-insts"] = true; 331 Features["atomic-buffer-pk-add-bf16-inst"] = true; 332 Features["atomic-buffer-global-pk-add-f16-insts"] = true; 333 Features["atomic-ds-pk-add-16-insts"] = true; 334 Features["atomic-fadd-rtn-insts"] = true; 335 Features["atomic-flat-pk-add-16-insts"] = true; 336 Features["atomic-global-pk-add-bf16-inst"] = true; 337 Features["bf8-cvt-scale-insts"] = true; 338 Features["bitop3-insts"] = true; 339 Features["ci-insts"] = true; 340 Features["dl-insts"] = true; 341 Features["dot1-insts"] = true; 342 Features["dot2-insts"] = true; 343 Features["dot3-insts"] = true; 344 Features["dot4-insts"] = true; 345 Features["dot5-insts"] = true; 346 Features["dot6-insts"] = true; 347 Features["dot7-insts"] = true; 348 Features["dot8-insts"] = true; 349 Features["dot9-insts"] = true; 350 Features["dot10-insts"] = true; 351 Features["dot11-insts"] = true; 352 Features["dot12-insts"] = true; 353 Features["dot13-insts"] = true; 354 Features["dpp"] = true; 355 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; 356 Features["f32-to-f16bf16-cvt-sr-insts"] = true; 357 Features["fp4-cvt-scale-insts"] = true; 358 Features["fp6bf6-cvt-scale-insts"] = true; 359 Features["fp8-insts"] = true; 360 Features["fp8-conversion-insts"] = true; 361 Features["fp8-cvt-scale-insts"] = true; 362 Features["gfx8-insts"] = true; 363 Features["gfx9-insts"] = true; 364 Features["gfx90a-insts"] = true; 365 Features["gfx940-insts"] = true; 366 Features["gfx950-insts"] = true; 367 Features["gfx10-insts"] = true; 368 Features["gfx10-3-insts"] = true; 369 Features["gfx11-insts"] = true; 370 Features["gfx12-insts"] = true; 371 Features["gws"] = true; 372 Features["image-insts"] = true; 373 Features["s-memrealtime"] = true; 374 Features["s-memtime-inst"] = true; 375 Features["mai-insts"] = true; 376 Features["permlane16-swap"] = true; 377 Features["permlane32-swap"] = true; 378 Features["prng-inst"] = true; 379 Features["wavefrontsize32"] = true; 380 Features["wavefrontsize64"] = true; 381 } else if (T.isAMDGCN()) { 382 AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); 383 switch (Kind) { 384 case GK_GFX1201: 385 case GK_GFX1200: 386 case GK_GFX12_GENERIC: 387 Features["ci-insts"] = true; 388 Features["dot7-insts"] = true; 389 Features["dot8-insts"] = true; 390 Features["dot9-insts"] = true; 391 Features["dot10-insts"] = true; 392 Features["dot11-insts"] = true; 393 Features["dot12-insts"] = true; 394 Features["dl-insts"] = true; 395 Features["atomic-ds-pk-add-16-insts"] = true; 396 Features["atomic-flat-pk-add-16-insts"] = true; 397 Features["atomic-buffer-global-pk-add-f16-insts"] = true; 398 Features["atomic-buffer-pk-add-bf16-inst"] = true; 399 Features["atomic-global-pk-add-bf16-inst"] = true; 400 Features["16-bit-insts"] = true; 401 Features["dpp"] = true; 402 Features["gfx8-insts"] = true; 403 Features["gfx9-insts"] = true; 404 Features["gfx10-insts"] = true; 405 Features["gfx10-3-insts"] = true; 406 Features["gfx11-insts"] = true; 407 Features["gfx12-insts"] = true; 408 Features["atomic-fadd-rtn-insts"] = true; 409 Features["image-insts"] = true; 410 Features["fp8-conversion-insts"] = true; 411 break; 412 case GK_GFX1153: 413 case GK_GFX1152: 414 case GK_GFX1151: 415 case GK_GFX1150: 416 case GK_GFX1103: 417 case GK_GFX1102: 418 case GK_GFX1101: 419 case GK_GFX1100: 420 case GK_GFX11_GENERIC: 421 Features["ci-insts"] = true; 422 Features["dot5-insts"] = true; 423 Features["dot7-insts"] = true; 424 Features["dot8-insts"] = true; 425 Features["dot9-insts"] = true; 426 Features["dot10-insts"] = true; 427 Features["dot12-insts"] = true; 428 Features["dl-insts"] = true; 429 Features["16-bit-insts"] = true; 430 Features["dpp"] = true; 431 Features["gfx8-insts"] = true; 432 Features["gfx9-insts"] = true; 433 Features["gfx10-insts"] = true; 434 Features["gfx10-3-insts"] = true; 435 Features["gfx11-insts"] = true; 436 Features["atomic-fadd-rtn-insts"] = true; 437 Features["image-insts"] = true; 438 Features["gws"] = true; 439 break; 440 case GK_GFX1036: 441 case GK_GFX1035: 442 case GK_GFX1034: 443 case GK_GFX1033: 444 case GK_GFX1032: 445 case GK_GFX1031: 446 case GK_GFX1030: 447 case GK_GFX10_3_GENERIC: 448 Features["ci-insts"] = true; 449 Features["dot1-insts"] = true; 450 Features["dot2-insts"] = true; 451 Features["dot5-insts"] = true; 452 Features["dot6-insts"] = true; 453 Features["dot7-insts"] = true; 454 Features["dot10-insts"] = true; 455 Features["dl-insts"] = true; 456 Features["16-bit-insts"] = true; 457 Features["dpp"] = true; 458 Features["gfx8-insts"] = true; 459 Features["gfx9-insts"] = true; 460 Features["gfx10-insts"] = true; 461 Features["gfx10-3-insts"] = true; 462 Features["image-insts"] = true; 463 Features["s-memrealtime"] = true; 464 Features["s-memtime-inst"] = true; 465 Features["gws"] = true; 466 break; 467 case GK_GFX1012: 468 case GK_GFX1011: 469 Features["dot1-insts"] = true; 470 Features["dot2-insts"] = true; 471 Features["dot5-insts"] = true; 472 Features["dot6-insts"] = true; 473 Features["dot7-insts"] = true; 474 Features["dot10-insts"] = true; 475 [[fallthrough]]; 476 case GK_GFX1013: 477 case GK_GFX1010: 478 case GK_GFX10_1_GENERIC: 479 Features["dl-insts"] = true; 480 Features["ci-insts"] = true; 481 Features["16-bit-insts"] = true; 482 Features["dpp"] = true; 483 Features["gfx8-insts"] = true; 484 Features["gfx9-insts"] = true; 485 Features["gfx10-insts"] = true; 486 Features["image-insts"] = true; 487 Features["s-memrealtime"] = true; 488 Features["s-memtime-inst"] = true; 489 Features["gws"] = true; 490 break; 491 case GK_GFX950: 492 Features["bitop3-insts"] = true; 493 Features["fp6bf6-cvt-scale-insts"] = true; 494 Features["fp4-cvt-scale-insts"] = true; 495 Features["bf8-cvt-scale-insts"] = true; 496 Features["fp8-cvt-scale-insts"] = true; 497 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; 498 Features["f32-to-f16bf16-cvt-sr-insts"] = true; 499 Features["prng-inst"] = true; 500 Features["permlane16-swap"] = true; 501 Features["permlane32-swap"] = true; 502 Features["ashr-pk-insts"] = true; 503 Features["dot12-insts"] = true; 504 Features["dot13-insts"] = true; 505 Features["atomic-buffer-pk-add-bf16-inst"] = true; 506 Features["gfx950-insts"] = true; 507 [[fallthrough]]; 508 case GK_GFX942: 509 case GK_GFX941: 510 case GK_GFX940: 511 Features["fp8-insts"] = true; 512 Features["fp8-conversion-insts"] = true; 513 if (Kind != GK_GFX950) 514 Features["xf32-insts"] = true; 515 [[fallthrough]]; 516 case GK_GFX9_4_GENERIC: 517 Features["gfx940-insts"] = true; 518 Features["atomic-ds-pk-add-16-insts"] = true; 519 Features["atomic-flat-pk-add-16-insts"] = true; 520 Features["atomic-global-pk-add-bf16-inst"] = true; 521 Features["gfx90a-insts"] = true; 522 Features["atomic-buffer-global-pk-add-f16-insts"] = true; 523 Features["atomic-fadd-rtn-insts"] = true; 524 Features["dot3-insts"] = true; 525 Features["dot4-insts"] = true; 526 Features["dot5-insts"] = true; 527 Features["dot6-insts"] = true; 528 Features["mai-insts"] = true; 529 Features["dl-insts"] = true; 530 Features["dot1-insts"] = true; 531 Features["dot2-insts"] = true; 532 Features["dot7-insts"] = true; 533 Features["dot10-insts"] = true; 534 Features["gfx9-insts"] = true; 535 Features["gfx8-insts"] = true; 536 Features["16-bit-insts"] = true; 537 Features["dpp"] = true; 538 Features["s-memrealtime"] = true; 539 Features["ci-insts"] = true; 540 Features["s-memtime-inst"] = true; 541 Features["gws"] = true; 542 break; 543 case GK_GFX90A: 544 Features["gfx90a-insts"] = true; 545 Features["atomic-buffer-global-pk-add-f16-insts"] = true; 546 Features["atomic-fadd-rtn-insts"] = true; 547 [[fallthrough]]; 548 case GK_GFX908: 549 Features["dot3-insts"] = true; 550 Features["dot4-insts"] = true; 551 Features["dot5-insts"] = true; 552 Features["dot6-insts"] = true; 553 Features["mai-insts"] = true; 554 [[fallthrough]]; 555 case GK_GFX906: 556 Features["dl-insts"] = true; 557 Features["dot1-insts"] = true; 558 Features["dot2-insts"] = true; 559 Features["dot7-insts"] = true; 560 Features["dot10-insts"] = true; 561 [[fallthrough]]; 562 case GK_GFX90C: 563 case GK_GFX909: 564 case GK_GFX904: 565 case GK_GFX902: 566 case GK_GFX900: 567 case GK_GFX9_GENERIC: 568 Features["gfx9-insts"] = true; 569 [[fallthrough]]; 570 case GK_GFX810: 571 case GK_GFX805: 572 case GK_GFX803: 573 case GK_GFX802: 574 case GK_GFX801: 575 Features["gfx8-insts"] = true; 576 Features["16-bit-insts"] = true; 577 Features["dpp"] = true; 578 Features["s-memrealtime"] = true; 579 [[fallthrough]]; 580 case GK_GFX705: 581 case GK_GFX704: 582 case GK_GFX703: 583 case GK_GFX702: 584 case GK_GFX701: 585 case GK_GFX700: 586 Features["ci-insts"] = true; 587 [[fallthrough]]; 588 case GK_GFX602: 589 case GK_GFX601: 590 case GK_GFX600: 591 Features["image-insts"] = true; 592 Features["s-memtime-inst"] = true; 593 Features["gws"] = true; 594 break; 595 case GK_NONE: 596 break; 597 default: 598 llvm_unreachable("Unhandled GPU!"); 599 } 600 } else { 601 if (GPU.empty()) 602 GPU = "r600"; 603 604 switch (llvm::AMDGPU::parseArchR600(GPU)) { 605 case GK_CAYMAN: 606 case GK_CYPRESS: 607 case GK_RV770: 608 case GK_RV670: 609 // TODO: Add fp64 when implemented. 610 break; 611 case GK_TURKS: 612 case GK_CAICOS: 613 case GK_BARTS: 614 case GK_SUMO: 615 case GK_REDWOOD: 616 case GK_JUNIPER: 617 case GK_CEDAR: 618 case GK_RV730: 619 case GK_RV710: 620 case GK_RS880: 621 case GK_R630: 622 case GK_R600: 623 break; 624 default: 625 llvm_unreachable("Unhandled GPU!"); 626 } 627 } 628 } 629 630 static bool isWave32Capable(StringRef GPU, const Triple &T) { 631 bool IsWave32Capable = false; 632 // XXX - What does the member GPU mean if device name string passed here? 633 if (T.isAMDGCN()) { 634 switch (parseArchAMDGCN(GPU)) { 635 case GK_GFX1201: 636 case GK_GFX1200: 637 case GK_GFX1153: 638 case GK_GFX1152: 639 case GK_GFX1151: 640 case GK_GFX1150: 641 case GK_GFX1103: 642 case GK_GFX1102: 643 case GK_GFX1101: 644 case GK_GFX1100: 645 case GK_GFX1036: 646 case GK_GFX1035: 647 case GK_GFX1034: 648 case GK_GFX1033: 649 case GK_GFX1032: 650 case GK_GFX1031: 651 case GK_GFX1030: 652 case GK_GFX1012: 653 case GK_GFX1011: 654 case GK_GFX1013: 655 case GK_GFX1010: 656 case GK_GFX12_GENERIC: 657 case GK_GFX11_GENERIC: 658 case GK_GFX10_3_GENERIC: 659 case GK_GFX10_1_GENERIC: 660 IsWave32Capable = true; 661 break; 662 default: 663 break; 664 } 665 } 666 return IsWave32Capable; 667 } 668 669 std::pair<FeatureError, StringRef> 670 AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T, 671 StringMap<bool> &Features) { 672 bool IsWave32Capable = isWave32Capable(GPU, T); 673 const bool IsNullGPU = GPU.empty(); 674 const bool HaveWave32 = Features.count("wavefrontsize32"); 675 const bool HaveWave64 = Features.count("wavefrontsize64"); 676 if (HaveWave32 && HaveWave64) { 677 return {AMDGPU::INVALID_FEATURE_COMBINATION, 678 "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"}; 679 } 680 if (HaveWave32 && !IsNullGPU && !IsWave32Capable) { 681 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"}; 682 } 683 // Don't assume any wavesize with an unknown subtarget. 684 if (!IsNullGPU) { 685 // Default to wave32 if available, or wave64 if not 686 if (!HaveWave32 && !HaveWave64) { 687 StringRef DefaultWaveSizeFeature = 688 IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64"; 689 Features.insert(std::make_pair(DefaultWaveSizeFeature, true)); 690 } 691 } 692 return {NO_ERROR, StringRef()}; 693 } 694