1 #include "clang/Basic/Cuda.h" 2 3 #include "llvm/ADT/StringRef.h" 4 #include "llvm/ADT/Twine.h" 5 #include "llvm/Support/ErrorHandling.h" 6 #include "llvm/Support/VersionTuple.h" 7 8 namespace clang { 9 10 struct CudaVersionMapEntry { 11 const char *Name; 12 CudaVersion Version; 13 llvm::VersionTuple TVersion; 14 }; 15 #define CUDA_ENTRY(major, minor) \ 16 { \ 17 #major "." #minor, CudaVersion::CUDA_##major##minor, \ 18 llvm::VersionTuple(major, minor) \ 19 } 20 21 static const CudaVersionMapEntry CudaNameVersionMap[] = { 22 CUDA_ENTRY(7, 0), 23 CUDA_ENTRY(7, 5), 24 CUDA_ENTRY(8, 0), 25 CUDA_ENTRY(9, 0), 26 CUDA_ENTRY(9, 1), 27 CUDA_ENTRY(9, 2), 28 CUDA_ENTRY(10, 0), 29 CUDA_ENTRY(10, 1), 30 CUDA_ENTRY(10, 2), 31 CUDA_ENTRY(11, 0), 32 CUDA_ENTRY(11, 1), 33 CUDA_ENTRY(11, 2), 34 CUDA_ENTRY(11, 3), 35 CUDA_ENTRY(11, 4), 36 CUDA_ENTRY(11, 5), 37 CUDA_ENTRY(11, 6), 38 CUDA_ENTRY(11, 7), 39 CUDA_ENTRY(11, 8), 40 CUDA_ENTRY(12, 0), 41 CUDA_ENTRY(12, 1), 42 CUDA_ENTRY(12, 2), 43 CUDA_ENTRY(12, 3), 44 CUDA_ENTRY(12, 4), 45 CUDA_ENTRY(12, 5), 46 CUDA_ENTRY(12, 6), 47 CUDA_ENTRY(12, 8), 48 {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())}, 49 {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. 50 }; 51 #undef CUDA_ENTRY 52 53 const char *CudaVersionToString(CudaVersion V) { 54 for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) 55 if (I->Version == V) 56 return I->Name; 57 58 return CudaVersionToString(CudaVersion::UNKNOWN); 59 } 60 61 CudaVersion CudaStringToVersion(const llvm::Twine &S) { 62 std::string VS = S.str(); 63 for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) 64 if (I->Name == VS) 65 return I->Version; 66 return CudaVersion::UNKNOWN; 67 } 68 69 CudaVersion ToCudaVersion(llvm::VersionTuple Version) { 70 for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) 71 if (I->TVersion == Version) 72 return I->Version; 73 return CudaVersion::UNKNOWN; 74 } 75 76 namespace { 77 struct OffloadArchToStringMap { 78 OffloadArch arch; 79 const char *arch_name; 80 const char *virtual_arch_name; 81 }; 82 } // namespace 83 84 #define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} 85 #define SM(sm) SM2(sm, "compute_" #sm) 86 #define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} 87 static const OffloadArchToStringMap arch_names[] = { 88 // clang-format off 89 {OffloadArch::UNUSED, "", ""}, 90 SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi 91 SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler 92 SM(50), SM(52), SM(53), // Maxwell 93 SM(60), SM(61), SM(62), // Pascal 94 SM(70), SM(72), // Volta 95 SM(75), // Turing 96 SM(80), SM(86), // Ampere 97 SM(87), // Jetson/Drive AGX Orin 98 SM(89), // Ada Lovelace 99 SM(90), // Hopper 100 SM(90a), // Hopper 101 SM(100), // Blackwell 102 SM(100a), // Blackwell 103 GFX(600), // gfx600 104 GFX(601), // gfx601 105 GFX(602), // gfx602 106 GFX(700), // gfx700 107 GFX(701), // gfx701 108 GFX(702), // gfx702 109 GFX(703), // gfx703 110 GFX(704), // gfx704 111 GFX(705), // gfx705 112 GFX(801), // gfx801 113 GFX(802), // gfx802 114 GFX(803), // gfx803 115 GFX(805), // gfx805 116 GFX(810), // gfx810 117 {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"}, 118 GFX(900), // gfx900 119 GFX(902), // gfx902 120 GFX(904), // gfx903 121 GFX(906), // gfx906 122 GFX(908), // gfx908 123 GFX(909), // gfx909 124 GFX(90a), // gfx90a 125 GFX(90c), // gfx90c 126 {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, 127 GFX(940), // gfx940 128 GFX(941), // gfx941 129 GFX(942), // gfx942 130 GFX(950), // gfx950 131 {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, 132 GFX(1010), // gfx1010 133 GFX(1011), // gfx1011 134 GFX(1012), // gfx1012 135 GFX(1013), // gfx1013 136 {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"}, 137 GFX(1030), // gfx1030 138 GFX(1031), // gfx1031 139 GFX(1032), // gfx1032 140 GFX(1033), // gfx1033 141 GFX(1034), // gfx1034 142 GFX(1035), // gfx1035 143 GFX(1036), // gfx1036 144 {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"}, 145 GFX(1100), // gfx1100 146 GFX(1101), // gfx1101 147 GFX(1102), // gfx1102 148 GFX(1103), // gfx1103 149 GFX(1150), // gfx1150 150 GFX(1151), // gfx1151 151 GFX(1152), // gfx1152 152 GFX(1153), // gfx1153 153 {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, 154 GFX(1200), // gfx1200 155 GFX(1201), // gfx1201 156 {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, 157 {OffloadArch::Generic, "generic", ""}, 158 // clang-format on 159 }; 160 #undef SM 161 #undef SM2 162 #undef GFX 163 164 const char *OffloadArchToString(OffloadArch A) { 165 auto result = std::find_if( 166 std::begin(arch_names), std::end(arch_names), 167 [A](const OffloadArchToStringMap &map) { return A == map.arch; }); 168 if (result == std::end(arch_names)) 169 return "unknown"; 170 return result->arch_name; 171 } 172 173 const char *OffloadArchToVirtualArchString(OffloadArch A) { 174 auto result = std::find_if( 175 std::begin(arch_names), std::end(arch_names), 176 [A](const OffloadArchToStringMap &map) { return A == map.arch; }); 177 if (result == std::end(arch_names)) 178 return "unknown"; 179 return result->virtual_arch_name; 180 } 181 182 OffloadArch StringToOffloadArch(llvm::StringRef S) { 183 auto result = std::find_if( 184 std::begin(arch_names), std::end(arch_names), 185 [S](const OffloadArchToStringMap &map) { return S == map.arch_name; }); 186 if (result == std::end(arch_names)) 187 return OffloadArch::UNKNOWN; 188 return result->arch; 189 } 190 191 CudaVersion MinVersionForOffloadArch(OffloadArch A) { 192 if (A == OffloadArch::UNKNOWN) 193 return CudaVersion::UNKNOWN; 194 195 // AMD GPUs do not depend on CUDA versions. 196 if (IsAMDOffloadArch(A)) 197 return CudaVersion::CUDA_70; 198 199 switch (A) { 200 case OffloadArch::SM_20: 201 case OffloadArch::SM_21: 202 case OffloadArch::SM_30: 203 case OffloadArch::SM_32_: 204 case OffloadArch::SM_35: 205 case OffloadArch::SM_37: 206 case OffloadArch::SM_50: 207 case OffloadArch::SM_52: 208 case OffloadArch::SM_53: 209 return CudaVersion::CUDA_70; 210 case OffloadArch::SM_60: 211 case OffloadArch::SM_61: 212 case OffloadArch::SM_62: 213 return CudaVersion::CUDA_80; 214 case OffloadArch::SM_70: 215 return CudaVersion::CUDA_90; 216 case OffloadArch::SM_72: 217 return CudaVersion::CUDA_91; 218 case OffloadArch::SM_75: 219 return CudaVersion::CUDA_100; 220 case OffloadArch::SM_80: 221 return CudaVersion::CUDA_110; 222 case OffloadArch::SM_86: 223 return CudaVersion::CUDA_111; 224 case OffloadArch::SM_87: 225 return CudaVersion::CUDA_114; 226 case OffloadArch::SM_89: 227 case OffloadArch::SM_90: 228 return CudaVersion::CUDA_118; 229 case OffloadArch::SM_90a: 230 return CudaVersion::CUDA_120; 231 case OffloadArch::SM_100: 232 case OffloadArch::SM_100a: 233 return CudaVersion::CUDA_128; 234 default: 235 llvm_unreachable("invalid enum"); 236 } 237 } 238 239 CudaVersion MaxVersionForOffloadArch(OffloadArch A) { 240 // AMD GPUs do not depend on CUDA versions. 241 if (IsAMDOffloadArch(A)) 242 return CudaVersion::NEW; 243 244 switch (A) { 245 case OffloadArch::UNKNOWN: 246 return CudaVersion::UNKNOWN; 247 case OffloadArch::SM_20: 248 case OffloadArch::SM_21: 249 return CudaVersion::CUDA_80; 250 case OffloadArch::SM_30: 251 case OffloadArch::SM_32_: 252 return CudaVersion::CUDA_102; 253 case OffloadArch::SM_35: 254 case OffloadArch::SM_37: 255 return CudaVersion::CUDA_118; 256 default: 257 return CudaVersion::NEW; 258 } 259 } 260 261 bool CudaFeatureEnabled(llvm::VersionTuple Version, CudaFeature Feature) { 262 return CudaFeatureEnabled(ToCudaVersion(Version), Feature); 263 } 264 265 bool CudaFeatureEnabled(CudaVersion Version, CudaFeature Feature) { 266 switch (Feature) { 267 case CudaFeature::CUDA_USES_NEW_LAUNCH: 268 return Version >= CudaVersion::CUDA_92; 269 case CudaFeature::CUDA_USES_FATBIN_REGISTER_END: 270 return Version >= CudaVersion::CUDA_101; 271 } 272 llvm_unreachable("Unknown CUDA feature."); 273 } 274 } // namespace clang 275