1 #include "clang/Basic/Cuda.h" 2 3 #include "llvm/ADT/StringRef.h" 4 #include "llvm/ADT/Twine.h" 5 #include "llvm/Support/ErrorHandling.h" 6 #include "llvm/Support/VersionTuple.h" 7 8 namespace clang { 9 10 struct CudaVersionMapEntry { 11 const char *Name; 12 CudaVersion Version; 13 llvm::VersionTuple TVersion; 14 }; 15 #define CUDA_ENTRY(major, minor) \ 16 { \ 17 #major "." #minor, CudaVersion::CUDA_##major##minor, \ 18 llvm::VersionTuple(major, minor) \ 19 } 20 21 static const CudaVersionMapEntry CudaNameVersionMap[] = { 22 CUDA_ENTRY(7, 0), 23 CUDA_ENTRY(7, 5), 24 CUDA_ENTRY(8, 0), 25 CUDA_ENTRY(9, 0), 26 CUDA_ENTRY(9, 1), 27 CUDA_ENTRY(9, 2), 28 CUDA_ENTRY(10, 0), 29 CUDA_ENTRY(10, 1), 30 CUDA_ENTRY(10, 2), 31 CUDA_ENTRY(11, 0), 32 CUDA_ENTRY(11, 1), 33 CUDA_ENTRY(11, 2), 34 CUDA_ENTRY(11, 3), 35 CUDA_ENTRY(11, 4), 36 CUDA_ENTRY(11, 5), 37 CUDA_ENTRY(11, 6), 38 CUDA_ENTRY(11, 7), 39 CUDA_ENTRY(11, 8), 40 CUDA_ENTRY(12, 0), 41 CUDA_ENTRY(12, 1), 42 CUDA_ENTRY(12, 2), 43 CUDA_ENTRY(12, 3), 44 CUDA_ENTRY(12, 4), 45 CUDA_ENTRY(12, 5), 46 CUDA_ENTRY(12, 6), 47 CUDA_ENTRY(12, 7), 48 CUDA_ENTRY(12, 8), 49 CUDA_ENTRY(12, 9), 50 {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())}, 51 {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. 52 }; 53 #undef CUDA_ENTRY 54 55 const char *CudaVersionToString(CudaVersion V) { 56 for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) 57 if (I->Version == V) 58 return I->Name; 59 60 return CudaVersionToString(CudaVersion::UNKNOWN); 61 } 62 63 CudaVersion CudaStringToVersion(const llvm::Twine &S) { 64 std::string VS = S.str(); 65 for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) 66 if (I->Name == VS) 67 return I->Version; 68 return CudaVersion::UNKNOWN; 69 } 70 71 CudaVersion ToCudaVersion(llvm::VersionTuple Version) { 72 for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) 73 if (I->TVersion == Version) 74 return I->Version; 75 return CudaVersion::UNKNOWN; 76 } 77 78 namespace { 79 struct OffloadArchToStringMap { 80 OffloadArch arch; 81 const char *arch_name; 82 const char *virtual_arch_name; 83 }; 84 } // namespace 85 86 #define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} 87 #define SM(sm) SM2(sm, "compute_" #sm) 88 #define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} 89 static const OffloadArchToStringMap arch_names[] = { 90 // clang-format off 91 {OffloadArch::UNUSED, "", ""}, 92 SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi 93 SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler 94 SM(50), SM(52), SM(53), // Maxwell 95 SM(60), SM(61), SM(62), // Pascal 96 SM(70), SM(72), // Volta 97 SM(75), // Turing 98 SM(80), SM(86), // Ampere 99 SM(87), // Jetson/Drive AGX Orin 100 SM(89), // Ada Lovelace 101 SM(90), // Hopper 102 SM(90a), // Hopper 103 SM(100), // Blackwell 104 SM(100a), // Blackwell 105 GFX(600), // gfx600 106 GFX(601), // gfx601 107 GFX(602), // gfx602 108 GFX(700), // gfx700 109 GFX(701), // gfx701 110 GFX(702), // gfx702 111 GFX(703), // gfx703 112 GFX(704), // gfx704 113 GFX(705), // gfx705 114 GFX(801), // gfx801 115 GFX(802), // gfx802 116 GFX(803), // gfx803 117 GFX(805), // gfx805 118 GFX(810), // gfx810 119 {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"}, 120 GFX(900), // gfx900 121 GFX(902), // gfx902 122 GFX(904), // gfx903 123 GFX(906), // gfx906 124 GFX(908), // gfx908 125 GFX(909), // gfx909 126 GFX(90a), // gfx90a 127 GFX(90c), // gfx90c 128 {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, 129 GFX(940), // gfx940 130 GFX(941), // gfx941 131 GFX(942), // gfx942 132 GFX(950), // gfx950 133 {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, 134 GFX(1010), // gfx1010 135 GFX(1011), // gfx1011 136 GFX(1012), // gfx1012 137 GFX(1013), // gfx1013 138 {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"}, 139 GFX(1030), // gfx1030 140 GFX(1031), // gfx1031 141 GFX(1032), // gfx1032 142 GFX(1033), // gfx1033 143 GFX(1034), // gfx1034 144 GFX(1035), // gfx1035 145 GFX(1036), // gfx1036 146 {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"}, 147 GFX(1100), // gfx1100 148 GFX(1101), // gfx1101 149 GFX(1102), // gfx1102 150 GFX(1103), // gfx1103 151 GFX(1150), // gfx1150 152 GFX(1151), // gfx1151 153 GFX(1152), // gfx1152 154 GFX(1153), // gfx1153 155 {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, 156 GFX(1200), // gfx1200 157 GFX(1201), // gfx1201 158 {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, 159 {OffloadArch::Generic, "generic", ""}, 160 // clang-format on 161 }; 162 #undef SM 163 #undef SM2 164 #undef GFX 165 166 const char *OffloadArchToString(OffloadArch A) { 167 auto result = std::find_if( 168 std::begin(arch_names), std::end(arch_names), 169 [A](const OffloadArchToStringMap &map) { return A == map.arch; }); 170 if (result == std::end(arch_names)) 171 return "unknown"; 172 return result->arch_name; 173 } 174 175 const char *OffloadArchToVirtualArchString(OffloadArch A) { 176 auto result = std::find_if( 177 std::begin(arch_names), std::end(arch_names), 178 [A](const OffloadArchToStringMap &map) { return A == map.arch; }); 179 if (result == std::end(arch_names)) 180 return "unknown"; 181 return result->virtual_arch_name; 182 } 183 184 OffloadArch StringToOffloadArch(llvm::StringRef S) { 185 auto result = std::find_if( 186 std::begin(arch_names), std::end(arch_names), 187 [S](const OffloadArchToStringMap &map) { return S == map.arch_name; }); 188 if (result == std::end(arch_names)) 189 return OffloadArch::UNKNOWN; 190 return result->arch; 191 } 192 193 CudaVersion MinVersionForOffloadArch(OffloadArch A) { 194 if (A == OffloadArch::UNKNOWN) 195 return CudaVersion::UNKNOWN; 196 197 // AMD GPUs do not depend on CUDA versions. 198 if (IsAMDOffloadArch(A)) 199 return CudaVersion::CUDA_70; 200 201 switch (A) { 202 case OffloadArch::SM_20: 203 case OffloadArch::SM_21: 204 case OffloadArch::SM_30: 205 case OffloadArch::SM_32_: 206 case OffloadArch::SM_35: 207 case OffloadArch::SM_37: 208 case OffloadArch::SM_50: 209 case OffloadArch::SM_52: 210 case OffloadArch::SM_53: 211 return CudaVersion::CUDA_70; 212 case OffloadArch::SM_60: 213 case OffloadArch::SM_61: 214 case OffloadArch::SM_62: 215 return CudaVersion::CUDA_80; 216 case OffloadArch::SM_70: 217 return CudaVersion::CUDA_90; 218 case OffloadArch::SM_72: 219 return CudaVersion::CUDA_91; 220 case OffloadArch::SM_75: 221 return CudaVersion::CUDA_100; 222 case OffloadArch::SM_80: 223 return CudaVersion::CUDA_110; 224 case OffloadArch::SM_86: 225 return CudaVersion::CUDA_111; 226 case OffloadArch::SM_87: 227 return CudaVersion::CUDA_114; 228 case OffloadArch::SM_89: 229 case OffloadArch::SM_90: 230 return CudaVersion::CUDA_118; 231 case OffloadArch::SM_90a: 232 return CudaVersion::CUDA_120; 233 case OffloadArch::SM_100: 234 case OffloadArch::SM_100a: 235 return CudaVersion::CUDA_127; 236 default: 237 llvm_unreachable("invalid enum"); 238 } 239 } 240 241 CudaVersion MaxVersionForOffloadArch(OffloadArch A) { 242 // AMD GPUs do not depend on CUDA versions. 243 if (IsAMDOffloadArch(A)) 244 return CudaVersion::NEW; 245 246 switch (A) { 247 case OffloadArch::UNKNOWN: 248 return CudaVersion::UNKNOWN; 249 case OffloadArch::SM_20: 250 case OffloadArch::SM_21: 251 return CudaVersion::CUDA_80; 252 case OffloadArch::SM_30: 253 case OffloadArch::SM_32_: 254 return CudaVersion::CUDA_102; 255 case OffloadArch::SM_35: 256 case OffloadArch::SM_37: 257 return CudaVersion::CUDA_118; 258 default: 259 return CudaVersion::NEW; 260 } 261 } 262 263 bool CudaFeatureEnabled(llvm::VersionTuple Version, CudaFeature Feature) { 264 return CudaFeatureEnabled(ToCudaVersion(Version), Feature); 265 } 266 267 bool CudaFeatureEnabled(CudaVersion Version, CudaFeature Feature) { 268 switch (Feature) { 269 case CudaFeature::CUDA_USES_NEW_LAUNCH: 270 return Version >= CudaVersion::CUDA_92; 271 case CudaFeature::CUDA_USES_FATBIN_REGISTER_END: 272 return Version >= CudaVersion::CUDA_101; 273 } 274 llvm_unreachable("Unknown CUDA feature."); 275 } 276 } // namespace clang 277