1 //===- IdentifierTable.cpp - Hash table for identifier lookup -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the IdentifierInfo, IdentifierVisitor, and 10 // IdentifierTable interfaces. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Basic/IdentifierTable.h" 15 #include "clang/Basic/CharInfo.h" 16 #include "clang/Basic/DiagnosticLex.h" 17 #include "clang/Basic/LangOptions.h" 18 #include "clang/Basic/OperatorKinds.h" 19 #include "clang/Basic/Specifiers.h" 20 #include "clang/Basic/TargetBuiltins.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/DenseMapInfo.h" 23 #include "llvm/ADT/FoldingSet.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringMap.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Support/Allocator.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <cassert> 31 #include <cstdio> 32 #include <cstring> 33 #include <string> 34 35 using namespace clang; 36 37 // A check to make sure the ObjCOrBuiltinID has sufficient room to store the 38 // largest possible target/aux-target combination. If we exceed this, we likely 39 // need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h. 40 static_assert(2 * LargestBuiltinID < (2 << (ObjCOrBuiltinIDBits - 1)), 41 "Insufficient ObjCOrBuiltinID Bits"); 42 43 //===----------------------------------------------------------------------===// 44 // IdentifierTable Implementation 45 //===----------------------------------------------------------------------===// 46 47 IdentifierIterator::~IdentifierIterator() = default; 48 49 IdentifierInfoLookup::~IdentifierInfoLookup() = default; 50 51 namespace { 52 53 /// A simple identifier lookup iterator that represents an 54 /// empty sequence of identifiers. 55 class EmptyLookupIterator : public IdentifierIterator 56 { 57 public: 58 StringRef Next() override { return StringRef(); } 59 }; 60 61 } // namespace 62 63 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() { 64 return new EmptyLookupIterator(); 65 } 66 67 IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup) 68 : HashTable(8192), // Start with space for 8K identifiers. 69 ExternalLookup(ExternalLookup) {} 70 71 IdentifierTable::IdentifierTable(const LangOptions &LangOpts, 72 IdentifierInfoLookup *ExternalLookup) 73 : IdentifierTable(ExternalLookup) { 74 // Populate the identifier table with info about keywords for the current 75 // language. 76 AddKeywords(LangOpts); 77 } 78 79 //===----------------------------------------------------------------------===// 80 // Language Keyword Implementation 81 //===----------------------------------------------------------------------===// 82 83 // Constants for TokenKinds.def 84 namespace { 85 86 enum TokenKey : unsigned { 87 KEYC99 = 0x1, 88 KEYCXX = 0x2, 89 KEYCXX11 = 0x4, 90 KEYGNU = 0x8, 91 KEYMS = 0x10, 92 BOOLSUPPORT = 0x20, 93 KEYALTIVEC = 0x40, 94 KEYNOCXX = 0x80, 95 KEYBORLAND = 0x100, 96 KEYOPENCLC = 0x200, 97 KEYC2X = 0x400, 98 KEYNOMS18 = 0x800, 99 KEYNOOPENCL = 0x1000, 100 WCHARSUPPORT = 0x2000, 101 HALFSUPPORT = 0x4000, 102 CHAR8SUPPORT = 0x8000, 103 KEYOBJC = 0x10000, 104 KEYZVECTOR = 0x20000, 105 KEYCOROUTINES = 0x40000, 106 KEYMODULES = 0x80000, 107 KEYCXX20 = 0x100000, 108 KEYOPENCLCXX = 0x200000, 109 KEYMSCOMPAT = 0x400000, 110 KEYSYCL = 0x800000, 111 KEYCUDA = 0x1000000, 112 KEYHLSL = 0x2000000, 113 KEYMAX = KEYHLSL, // The maximum key 114 KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20, 115 KEYALL = (KEYMAX | (KEYMAX-1)) & ~KEYNOMS18 & 116 ~KEYNOOPENCL // KEYNOMS18 and KEYNOOPENCL are used to exclude. 117 }; 118 119 /// How a keyword is treated in the selected standard. This enum is ordered 120 /// intentionally so that the value that 'wins' is the most 'permissive'. 121 enum KeywordStatus { 122 KS_Unknown, // Not yet calculated. Used when figuring out the status. 123 KS_Disabled, // Disabled 124 KS_Future, // Is a keyword in future standard 125 KS_Extension, // Is an extension 126 KS_Enabled, // Enabled 127 }; 128 129 } // namespace 130 131 // This works on a single TokenKey flag and checks the LangOpts to get the 132 // KeywordStatus based exclusively on this flag, so that it can be merged in 133 // getKeywordStatus. Most should be enabled/disabled, but some might imply 134 // 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to 135 // be disabled, and the calling function makes it 'disabled' if no other flag 136 // changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags. 137 static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts, 138 TokenKey Flag) { 139 // Flag is a single bit version of TokenKey (that is, not 140 // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function. 141 assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?"); 142 143 switch (Flag) { 144 case KEYC99: 145 if (LangOpts.C99) 146 return KS_Enabled; 147 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 148 case KEYC2X: 149 if (LangOpts.C2x) 150 return KS_Enabled; 151 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 152 case KEYCXX: 153 return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown; 154 case KEYCXX11: 155 if (LangOpts.CPlusPlus11) 156 return KS_Enabled; 157 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 158 case KEYCXX20: 159 if (LangOpts.CPlusPlus20) 160 return KS_Enabled; 161 return LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 162 case KEYGNU: 163 return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown; 164 case KEYMS: 165 return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown; 166 case BOOLSUPPORT: 167 if (LangOpts.Bool) return KS_Enabled; 168 return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown; 169 case KEYALTIVEC: 170 return LangOpts.AltiVec ? KS_Enabled : KS_Unknown; 171 case KEYBORLAND: 172 return LangOpts.Borland ? KS_Extension : KS_Unknown; 173 case KEYOPENCLC: 174 return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled 175 : KS_Unknown; 176 case WCHARSUPPORT: 177 return LangOpts.WChar ? KS_Enabled : KS_Unknown; 178 case HALFSUPPORT: 179 return LangOpts.Half ? KS_Enabled : KS_Unknown; 180 case CHAR8SUPPORT: 181 if (LangOpts.Char8) return KS_Enabled; 182 if (LangOpts.CPlusPlus20) return KS_Unknown; 183 if (LangOpts.CPlusPlus) return KS_Future; 184 return KS_Unknown; 185 case KEYOBJC: 186 // We treat bridge casts as objective-C keywords so we can warn on them 187 // in non-arc mode. 188 return LangOpts.ObjC ? KS_Enabled : KS_Unknown; 189 case KEYZVECTOR: 190 return LangOpts.ZVector ? KS_Enabled : KS_Unknown; 191 case KEYCOROUTINES: 192 return LangOpts.Coroutines ? KS_Enabled : KS_Unknown; 193 case KEYMODULES: 194 return KS_Unknown; 195 case KEYOPENCLCXX: 196 return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown; 197 case KEYMSCOMPAT: 198 return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown; 199 case KEYSYCL: 200 return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown; 201 case KEYCUDA: 202 return LangOpts.CUDA ? KS_Enabled : KS_Unknown; 203 case KEYHLSL: 204 return LangOpts.HLSL ? KS_Enabled : KS_Unknown; 205 case KEYNOCXX: 206 // This is enabled in all non-C++ modes, but might be enabled for other 207 // reasons as well. 208 return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled; 209 case KEYNOOPENCL: 210 // The disable behavior for this is handled in getKeywordStatus. 211 return KS_Unknown; 212 case KEYNOMS18: 213 // The disable behavior for this is handled in getKeywordStatus. 214 return KS_Unknown; 215 default: 216 llvm_unreachable("Unknown KeywordStatus flag"); 217 } 218 } 219 220 /// Translates flags as specified in TokenKinds.def into keyword status 221 /// in the given language standard. 222 static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, 223 unsigned Flags) { 224 // KEYALL means always enabled, so special case this one. 225 if (Flags == KEYALL) return KS_Enabled; 226 // These are tests that need to 'always win', as they are special in that they 227 // disable based on certain conditions. 228 if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled; 229 if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) && 230 !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015)) 231 return KS_Disabled; 232 233 KeywordStatus CurStatus = KS_Unknown; 234 235 while (Flags != 0) { 236 unsigned CurFlag = Flags & ~(Flags - 1); 237 Flags = Flags & ~CurFlag; 238 CurStatus = std::max( 239 CurStatus, 240 getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag))); 241 } 242 243 if (CurStatus == KS_Unknown) 244 return KS_Disabled; 245 return CurStatus; 246 } 247 248 /// AddKeyword - This method is used to associate a token ID with specific 249 /// identifiers because they are language keywords. This causes the lexer to 250 /// automatically map matching identifiers to specialized token codes. 251 static void AddKeyword(StringRef Keyword, 252 tok::TokenKind TokenCode, unsigned Flags, 253 const LangOptions &LangOpts, IdentifierTable &Table) { 254 KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags); 255 256 // Don't add this keyword if disabled in this language. 257 if (AddResult == KS_Disabled) return; 258 259 IdentifierInfo &Info = 260 Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode); 261 Info.setIsExtensionToken(AddResult == KS_Extension); 262 Info.setIsFutureCompatKeyword(AddResult == KS_Future); 263 } 264 265 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative 266 /// representations. 267 static void AddCXXOperatorKeyword(StringRef Keyword, 268 tok::TokenKind TokenCode, 269 IdentifierTable &Table) { 270 IdentifierInfo &Info = Table.get(Keyword, TokenCode); 271 Info.setIsCPlusPlusOperatorKeyword(); 272 } 273 274 /// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector" 275 /// or "property". 276 static void AddObjCKeyword(StringRef Name, 277 tok::ObjCKeywordKind ObjCID, 278 IdentifierTable &Table) { 279 Table.get(Name).setObjCKeywordID(ObjCID); 280 } 281 282 static void AddInterestingIdentifier(StringRef Name, 283 tok::InterestingIdentifierKind BTID, 284 IdentifierTable &Table) { 285 // Don't add 'not_interesting' identifier. 286 if (BTID != tok::not_interesting) { 287 IdentifierInfo &Info = Table.get(Name, tok::identifier); 288 Info.setInterestingIdentifierID(BTID); 289 } 290 } 291 292 /// AddKeywords - Add all keywords to the symbol table. 293 /// 294 void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { 295 // Add keywords and tokens for the current language. 296 #define KEYWORD(NAME, FLAGS) \ 297 AddKeyword(StringRef(#NAME), tok::kw_ ## NAME, \ 298 FLAGS, LangOpts, *this); 299 #define ALIAS(NAME, TOK, FLAGS) \ 300 AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \ 301 FLAGS, LangOpts, *this); 302 #define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \ 303 if (LangOpts.CXXOperatorNames) \ 304 AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this); 305 #define OBJC_AT_KEYWORD(NAME) \ 306 if (LangOpts.ObjC) \ 307 AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this); 308 #define INTERESTING_IDENTIFIER(NAME) \ 309 AddInterestingIdentifier(StringRef(#NAME), tok::NAME, *this); 310 311 #define TESTING_KEYWORD(NAME, FLAGS) 312 #include "clang/Basic/TokenKinds.def" 313 314 if (LangOpts.ParseUnknownAnytype) 315 AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL, 316 LangOpts, *this); 317 318 if (LangOpts.DeclSpecKeyword) 319 AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this); 320 321 if (LangOpts.IEEE128) 322 AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this); 323 324 // Add the 'import' contextual keyword. 325 get("import").setModulesImport(true); 326 } 327 328 /// Checks if the specified token kind represents a keyword in the 329 /// specified language. 330 /// \returns Status of the keyword in the language. 331 static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts, 332 tok::TokenKind K) { 333 switch (K) { 334 #define KEYWORD(NAME, FLAGS) \ 335 case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS); 336 #include "clang/Basic/TokenKinds.def" 337 default: return KS_Disabled; 338 } 339 } 340 341 /// Returns true if the identifier represents a keyword in the 342 /// specified language. 343 bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const { 344 switch (getTokenKwStatus(LangOpts, getTokenID())) { 345 case KS_Enabled: 346 case KS_Extension: 347 return true; 348 default: 349 return false; 350 } 351 } 352 353 /// Returns true if the identifier represents a C++ keyword in the 354 /// specified language. 355 bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const { 356 if (!LangOpts.CPlusPlus || !isKeyword(LangOpts)) 357 return false; 358 // This is a C++ keyword if this identifier is not a keyword when checked 359 // using LangOptions without C++ support. 360 LangOptions LangOptsNoCPP = LangOpts; 361 LangOptsNoCPP.CPlusPlus = false; 362 LangOptsNoCPP.CPlusPlus11 = false; 363 LangOptsNoCPP.CPlusPlus20 = false; 364 return !isKeyword(LangOptsNoCPP); 365 } 366 367 ReservedIdentifierStatus 368 IdentifierInfo::isReserved(const LangOptions &LangOpts) const { 369 StringRef Name = getName(); 370 371 // '_' is a reserved identifier, but its use is so common (e.g. to store 372 // ignored values) that we don't warn on it. 373 if (Name.size() <= 1) 374 return ReservedIdentifierStatus::NotReserved; 375 376 // [lex.name] p3 377 if (Name[0] == '_') { 378 379 // Each name that begins with an underscore followed by an uppercase letter 380 // or another underscore is reserved. 381 if (Name[1] == '_') 382 return ReservedIdentifierStatus::StartsWithDoubleUnderscore; 383 384 if ('A' <= Name[1] && Name[1] <= 'Z') 385 return ReservedIdentifierStatus:: 386 StartsWithUnderscoreFollowedByCapitalLetter; 387 388 // This is a bit misleading: it actually means it's only reserved if we're 389 // at global scope because it starts with an underscore. 390 return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope; 391 } 392 393 // Each name that contains a double underscore (__) is reserved. 394 if (LangOpts.CPlusPlus && Name.contains("__")) 395 return ReservedIdentifierStatus::ContainsDoubleUnderscore; 396 397 return ReservedIdentifierStatus::NotReserved; 398 } 399 400 StringRef IdentifierInfo::deuglifiedName() const { 401 StringRef Name = getName(); 402 if (Name.size() >= 2 && Name.front() == '_' && 403 (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z'))) 404 return Name.ltrim('_'); 405 return Name; 406 } 407 408 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { 409 // We use a perfect hash function here involving the length of the keyword, 410 // the first and third character. For preprocessor ID's there are no 411 // collisions (if there were, the switch below would complain about duplicate 412 // case values). Note that this depends on 'if' being null terminated. 413 414 #define HASH(LEN, FIRST, THIRD) \ 415 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31) 416 #define CASE(LEN, FIRST, THIRD, NAME) \ 417 case HASH(LEN, FIRST, THIRD): \ 418 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME 419 420 unsigned Len = getLength(); 421 if (Len < 2) return tok::pp_not_keyword; 422 const char *Name = getNameStart(); 423 switch (HASH(Len, Name[0], Name[2])) { 424 default: return tok::pp_not_keyword; 425 CASE( 2, 'i', '\0', if); 426 CASE( 4, 'e', 'i', elif); 427 CASE( 4, 'e', 's', else); 428 CASE( 4, 'l', 'n', line); 429 CASE( 4, 's', 'c', sccs); 430 CASE( 5, 'e', 'd', endif); 431 CASE( 5, 'e', 'r', error); 432 CASE( 5, 'i', 'e', ident); 433 CASE( 5, 'i', 'd', ifdef); 434 CASE( 5, 'u', 'd', undef); 435 436 CASE( 6, 'a', 's', assert); 437 CASE( 6, 'd', 'f', define); 438 CASE( 6, 'i', 'n', ifndef); 439 CASE( 6, 'i', 'p', import); 440 CASE( 6, 'p', 'a', pragma); 441 442 CASE( 7, 'd', 'f', defined); 443 CASE( 7, 'e', 'i', elifdef); 444 CASE( 7, 'i', 'c', include); 445 CASE( 7, 'w', 'r', warning); 446 447 CASE( 8, 'e', 'i', elifndef); 448 CASE( 8, 'u', 'a', unassert); 449 CASE(12, 'i', 'c', include_next); 450 451 CASE(14, '_', 'p', __public_macro); 452 453 CASE(15, '_', 'p', __private_macro); 454 455 CASE(16, '_', 'i', __include_macros); 456 #undef CASE 457 #undef HASH 458 } 459 } 460 461 //===----------------------------------------------------------------------===// 462 // Stats Implementation 463 //===----------------------------------------------------------------------===// 464 465 /// PrintStats - Print statistics about how well the identifier table is doing 466 /// at hashing identifiers. 467 void IdentifierTable::PrintStats() const { 468 unsigned NumBuckets = HashTable.getNumBuckets(); 469 unsigned NumIdentifiers = HashTable.getNumItems(); 470 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers; 471 unsigned AverageIdentifierSize = 0; 472 unsigned MaxIdentifierLength = 0; 473 474 // TODO: Figure out maximum times an identifier had to probe for -stats. 475 for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator 476 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) { 477 unsigned IdLen = I->getKeyLength(); 478 AverageIdentifierSize += IdLen; 479 if (MaxIdentifierLength < IdLen) 480 MaxIdentifierLength = IdLen; 481 } 482 483 fprintf(stderr, "\n*** Identifier Table Stats:\n"); 484 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers); 485 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets); 486 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n", 487 NumIdentifiers/(double)NumBuckets); 488 fprintf(stderr, "Ave identifier length: %f\n", 489 (AverageIdentifierSize/(double)NumIdentifiers)); 490 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength); 491 492 // Compute statistics about the memory allocated for identifiers. 493 HashTable.getAllocator().PrintStats(); 494 } 495 496 //===----------------------------------------------------------------------===// 497 // SelectorTable Implementation 498 //===----------------------------------------------------------------------===// 499 500 unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) { 501 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr()); 502 } 503 504 namespace clang { 505 506 /// One of these variable length records is kept for each 507 /// selector containing more than one keyword. We use a folding set 508 /// to unique aggregate names (keyword selectors in ObjC parlance). Access to 509 /// this class is provided strictly through Selector. 510 class alignas(IdentifierInfoAlignment) MultiKeywordSelector 511 : public detail::DeclarationNameExtra, 512 public llvm::FoldingSetNode { 513 MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {} 514 515 public: 516 // Constructor for keyword selectors. 517 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) 518 : DeclarationNameExtra(nKeys) { 519 assert((nKeys > 1) && "not a multi-keyword selector"); 520 521 // Fill in the trailing keyword array. 522 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1); 523 for (unsigned i = 0; i != nKeys; ++i) 524 KeyInfo[i] = IIV[i]; 525 } 526 527 // getName - Derive the full selector name and return it. 528 std::string getName() const; 529 530 using DeclarationNameExtra::getNumArgs; 531 532 using keyword_iterator = IdentifierInfo *const *; 533 534 keyword_iterator keyword_begin() const { 535 return reinterpret_cast<keyword_iterator>(this + 1); 536 } 537 538 keyword_iterator keyword_end() const { 539 return keyword_begin() + getNumArgs(); 540 } 541 542 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const { 543 assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index"); 544 return keyword_begin()[i]; 545 } 546 547 static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys, 548 unsigned NumArgs) { 549 ID.AddInteger(NumArgs); 550 for (unsigned i = 0; i != NumArgs; ++i) 551 ID.AddPointer(ArgTys[i]); 552 } 553 554 void Profile(llvm::FoldingSetNodeID &ID) { 555 Profile(ID, keyword_begin(), getNumArgs()); 556 } 557 }; 558 559 } // namespace clang. 560 561 bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const { 562 assert(!Names.empty() && "must have >= 1 selector slots"); 563 if (getNumArgs() != Names.size()) 564 return false; 565 for (unsigned I = 0, E = Names.size(); I != E; ++I) { 566 if (getNameForSlot(I) != Names[I]) 567 return false; 568 } 569 return true; 570 } 571 572 bool Selector::isUnarySelector(StringRef Name) const { 573 return isUnarySelector() && getNameForSlot(0) == Name; 574 } 575 576 unsigned Selector::getNumArgs() const { 577 unsigned IIF = getIdentifierInfoFlag(); 578 if (IIF <= ZeroArg) 579 return 0; 580 if (IIF == OneArg) 581 return 1; 582 // We point to a MultiKeywordSelector. 583 MultiKeywordSelector *SI = getMultiKeywordSelector(); 584 return SI->getNumArgs(); 585 } 586 587 IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const { 588 if (getIdentifierInfoFlag() < MultiArg) { 589 assert(argIndex == 0 && "illegal keyword index"); 590 return getAsIdentifierInfo(); 591 } 592 593 // We point to a MultiKeywordSelector. 594 MultiKeywordSelector *SI = getMultiKeywordSelector(); 595 return SI->getIdentifierInfoForSlot(argIndex); 596 } 597 598 StringRef Selector::getNameForSlot(unsigned int argIndex) const { 599 IdentifierInfo *II = getIdentifierInfoForSlot(argIndex); 600 return II ? II->getName() : StringRef(); 601 } 602 603 std::string MultiKeywordSelector::getName() const { 604 SmallString<256> Str; 605 llvm::raw_svector_ostream OS(Str); 606 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) { 607 if (*I) 608 OS << (*I)->getName(); 609 OS << ':'; 610 } 611 612 return std::string(OS.str()); 613 } 614 615 std::string Selector::getAsString() const { 616 if (InfoPtr == 0) 617 return "<null selector>"; 618 619 if (getIdentifierInfoFlag() < MultiArg) { 620 IdentifierInfo *II = getAsIdentifierInfo(); 621 622 if (getNumArgs() == 0) { 623 assert(II && "If the number of arguments is 0 then II is guaranteed to " 624 "not be null."); 625 return std::string(II->getName()); 626 } 627 628 if (!II) 629 return ":"; 630 631 return II->getName().str() + ":"; 632 } 633 634 // We have a multiple keyword selector. 635 return getMultiKeywordSelector()->getName(); 636 } 637 638 void Selector::print(llvm::raw_ostream &OS) const { 639 OS << getAsString(); 640 } 641 642 LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); } 643 644 /// Interpreting the given string using the normal CamelCase 645 /// conventions, determine whether the given string starts with the 646 /// given "word", which is assumed to end in a lowercase letter. 647 static bool startsWithWord(StringRef name, StringRef word) { 648 if (name.size() < word.size()) return false; 649 return ((name.size() == word.size() || !isLowercase(name[word.size()])) && 650 name.startswith(word)); 651 } 652 653 ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) { 654 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); 655 if (!first) return OMF_None; 656 657 StringRef name = first->getName(); 658 if (sel.isUnarySelector()) { 659 if (name == "autorelease") return OMF_autorelease; 660 if (name == "dealloc") return OMF_dealloc; 661 if (name == "finalize") return OMF_finalize; 662 if (name == "release") return OMF_release; 663 if (name == "retain") return OMF_retain; 664 if (name == "retainCount") return OMF_retainCount; 665 if (name == "self") return OMF_self; 666 if (name == "initialize") return OMF_initialize; 667 } 668 669 if (name == "performSelector" || name == "performSelectorInBackground" || 670 name == "performSelectorOnMainThread") 671 return OMF_performSelector; 672 673 // The other method families may begin with a prefix of underscores. 674 while (!name.empty() && name.front() == '_') 675 name = name.substr(1); 676 677 if (name.empty()) return OMF_None; 678 switch (name.front()) { 679 case 'a': 680 if (startsWithWord(name, "alloc")) return OMF_alloc; 681 break; 682 case 'c': 683 if (startsWithWord(name, "copy")) return OMF_copy; 684 break; 685 case 'i': 686 if (startsWithWord(name, "init")) return OMF_init; 687 break; 688 case 'm': 689 if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy; 690 break; 691 case 'n': 692 if (startsWithWord(name, "new")) return OMF_new; 693 break; 694 default: 695 break; 696 } 697 698 return OMF_None; 699 } 700 701 ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) { 702 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); 703 if (!first) return OIT_None; 704 705 StringRef name = first->getName(); 706 707 if (name.empty()) return OIT_None; 708 switch (name.front()) { 709 case 'a': 710 if (startsWithWord(name, "array")) return OIT_Array; 711 break; 712 case 'd': 713 if (startsWithWord(name, "default")) return OIT_ReturnsSelf; 714 if (startsWithWord(name, "dictionary")) return OIT_Dictionary; 715 break; 716 case 's': 717 if (startsWithWord(name, "shared")) return OIT_ReturnsSelf; 718 if (startsWithWord(name, "standard")) return OIT_Singleton; 719 break; 720 case 'i': 721 if (startsWithWord(name, "init")) return OIT_Init; 722 break; 723 default: 724 break; 725 } 726 return OIT_None; 727 } 728 729 ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) { 730 IdentifierInfo *first = sel.getIdentifierInfoForSlot(0); 731 if (!first) return SFF_None; 732 733 StringRef name = first->getName(); 734 735 switch (name.front()) { 736 case 'a': 737 if (name == "appendFormat") return SFF_NSString; 738 break; 739 740 case 'i': 741 if (name == "initWithFormat") return SFF_NSString; 742 break; 743 744 case 'l': 745 if (name == "localizedStringWithFormat") return SFF_NSString; 746 break; 747 748 case 's': 749 if (name == "stringByAppendingFormat" || 750 name == "stringWithFormat") return SFF_NSString; 751 break; 752 } 753 return SFF_None; 754 } 755 756 namespace { 757 758 struct SelectorTableImpl { 759 llvm::FoldingSet<MultiKeywordSelector> Table; 760 llvm::BumpPtrAllocator Allocator; 761 }; 762 763 } // namespace 764 765 static SelectorTableImpl &getSelectorTableImpl(void *P) { 766 return *static_cast<SelectorTableImpl*>(P); 767 } 768 769 SmallString<64> 770 SelectorTable::constructSetterName(StringRef Name) { 771 SmallString<64> SetterName("set"); 772 SetterName += Name; 773 SetterName[3] = toUppercase(SetterName[3]); 774 return SetterName; 775 } 776 777 Selector 778 SelectorTable::constructSetterSelector(IdentifierTable &Idents, 779 SelectorTable &SelTable, 780 const IdentifierInfo *Name) { 781 IdentifierInfo *SetterName = 782 &Idents.get(constructSetterName(Name->getName())); 783 return SelTable.getUnarySelector(SetterName); 784 } 785 786 std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) { 787 StringRef Name = Sel.getNameForSlot(0); 788 assert(Name.startswith("set") && "invalid setter name"); 789 return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str(); 790 } 791 792 size_t SelectorTable::getTotalMemory() const { 793 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl); 794 return SelTabImpl.Allocator.getTotalMemory(); 795 } 796 797 Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) { 798 if (nKeys < 2) 799 return Selector(IIV[0], nKeys); 800 801 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl); 802 803 // Unique selector, to guarantee there is one per name. 804 llvm::FoldingSetNodeID ID; 805 MultiKeywordSelector::Profile(ID, IIV, nKeys); 806 807 void *InsertPos = nullptr; 808 if (MultiKeywordSelector *SI = 809 SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos)) 810 return Selector(SI); 811 812 // MultiKeywordSelector objects are not allocated with new because they have a 813 // variable size array (for parameter types) at the end of them. 814 unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *); 815 MultiKeywordSelector *SI = 816 (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate( 817 Size, alignof(MultiKeywordSelector)); 818 new (SI) MultiKeywordSelector(nKeys, IIV); 819 SelTabImpl.Table.InsertNode(SI, InsertPos); 820 return Selector(SI); 821 } 822 823 SelectorTable::SelectorTable() { 824 Impl = new SelectorTableImpl(); 825 } 826 827 SelectorTable::~SelectorTable() { 828 delete &getSelectorTableImpl(Impl); 829 } 830 831 const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) { 832 switch (Operator) { 833 case OO_None: 834 case NUM_OVERLOADED_OPERATORS: 835 return nullptr; 836 837 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \ 838 case OO_##Name: return Spelling; 839 #include "clang/Basic/OperatorKinds.def" 840 } 841 842 llvm_unreachable("Invalid OverloadedOperatorKind!"); 843 } 844 845 StringRef clang::getNullabilitySpelling(NullabilityKind kind, 846 bool isContextSensitive) { 847 switch (kind) { 848 case NullabilityKind::NonNull: 849 return isContextSensitive ? "nonnull" : "_Nonnull"; 850 851 case NullabilityKind::Nullable: 852 return isContextSensitive ? "nullable" : "_Nullable"; 853 854 case NullabilityKind::NullableResult: 855 assert(!isContextSensitive && 856 "_Nullable_result isn't supported as context-sensitive keyword"); 857 return "_Nullable_result"; 858 859 case NullabilityKind::Unspecified: 860 return isContextSensitive ? "null_unspecified" : "_Null_unspecified"; 861 } 862 llvm_unreachable("Unknown nullability kind."); 863 } 864 865 llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS, 866 NullabilityKind NK) { 867 switch (NK) { 868 case NullabilityKind::NonNull: 869 return OS << "NonNull"; 870 case NullabilityKind::Nullable: 871 return OS << "Nullable"; 872 case NullabilityKind::NullableResult: 873 return OS << "NullableResult"; 874 case NullabilityKind::Unspecified: 875 return OS << "Unspecified"; 876 } 877 llvm_unreachable("Unknown nullability kind."); 878 } 879 880 diag::kind 881 IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II, 882 const LangOptions &LangOpts) { 883 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed"); 884 885 unsigned Flags = llvm::StringSwitch<unsigned>(II.getName()) 886 #define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS) 887 #include "clang/Basic/TokenKinds.def" 888 #undef KEYWORD 889 ; 890 891 if (LangOpts.CPlusPlus) { 892 if ((Flags & KEYCXX11) == KEYCXX11) 893 return diag::warn_cxx11_keyword; 894 895 // char8_t is not modeled as a CXX20_KEYWORD because it's not 896 // unconditionally enabled in C++20 mode. (It can be disabled 897 // by -fno-char8_t.) 898 if (((Flags & KEYCXX20) == KEYCXX20) || 899 ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT)) 900 return diag::warn_cxx20_keyword; 901 } else { 902 if ((Flags & KEYC99) == KEYC99) 903 return diag::warn_c99_keyword; 904 if ((Flags & KEYC2X) == KEYC2X) 905 return diag::warn_c2x_keyword; 906 } 907 908 llvm_unreachable( 909 "Keyword not known to come from a newer Standard or proposed Standard"); 910 } 911