1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and 11 /// clang::Selector interfaces. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 17 18 #include "clang/Basic/Builtins.h" 19 #include "clang/Basic/DiagnosticIDs.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/DenseMapInfo.h" 23 #include "llvm/ADT/FoldingSet.h" 24 #include "llvm/ADT/PointerIntPair.h" 25 #include "llvm/ADT/PointerUnion.h" 26 #include "llvm/ADT/SmallString.h" 27 #include "llvm/ADT/StringMap.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/Support/Allocator.h" 30 #include "llvm/Support/PointerLikeTypeTraits.h" 31 #include "llvm/Support/type_traits.h" 32 #include <cassert> 33 #include <cstddef> 34 #include <cstdint> 35 #include <cstring> 36 #include <string> 37 #include <utility> 38 39 namespace clang { 40 41 class DeclarationName; 42 class DeclarationNameTable; 43 class IdentifierInfo; 44 class LangOptions; 45 class MultiKeywordSelector; 46 class SourceLocation; 47 48 enum class ReservedIdentifierStatus { 49 NotReserved = 0, 50 StartsWithUnderscoreAtGlobalScope, 51 StartsWithUnderscoreAndIsExternC, 52 StartsWithDoubleUnderscore, 53 StartsWithUnderscoreFollowedByCapitalLetter, 54 ContainsDoubleUnderscore, 55 }; 56 57 enum class ReservedLiteralSuffixIdStatus { 58 NotReserved = 0, 59 NotStartsWithUnderscore, 60 ContainsDoubleUnderscore, 61 }; 62 63 /// Determine whether an identifier is reserved for use as a name at global 64 /// scope. Such identifiers might be implementation-specific global functions 65 /// or variables. 66 inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) { 67 return Status != ReservedIdentifierStatus::NotReserved; 68 } 69 70 /// Determine whether an identifier is reserved in all contexts. Such 71 /// identifiers might be implementation-specific keywords or macros, for 72 /// example. 73 inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) { 74 return Status != ReservedIdentifierStatus::NotReserved && 75 Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope && 76 Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC; 77 } 78 79 /// A simple pair of identifier info and location. 80 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>; 81 82 /// IdentifierInfo and other related classes are aligned to 83 /// 8 bytes so that DeclarationName can use the lower 3 bits 84 /// of a pointer to one of these classes. 85 enum { IdentifierInfoAlignment = 8 }; 86 87 static constexpr int InterestingIdentifierBits = 16; 88 89 /// The "layout" of InterestingIdentifier is: 90 /// - ObjCKeywordKind enumerators 91 /// - NotableIdentifierKind enumerators 92 /// - Builtin::ID enumerators 93 /// - NotInterestingIdentifier 94 enum class InterestingIdentifier { 95 #define OBJC_AT_KEYWORD(X) objc_##X, 96 #include "clang/Basic/TokenKinds.def" 97 NUM_OBJC_KEYWORDS, 98 99 #define NOTABLE_IDENTIFIER(X) X, 100 #include "clang/Basic/TokenKinds.def" 101 NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS, 102 103 NotBuiltin, 104 #define BUILTIN(ID, TYPE, ATTRS) BI##ID, 105 #include "clang/Basic/Builtins.inc" 106 FirstTSBuiltin, 107 108 NotInterestingIdentifier = 65534 109 }; 110 111 /// One of these records is kept for each identifier that 112 /// is lexed. This contains information about whether the token was \#define'd, 113 /// is a language keyword, or if it is a front-end token of some sort (e.g. a 114 /// variable or function name). The preprocessor keeps this information in a 115 /// set, and all tok::identifier tokens have a pointer to one of these. 116 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits. 117 class alignas(IdentifierInfoAlignment) IdentifierInfo { 118 friend class IdentifierTable; 119 120 // Front-end token ID or tok::identifier. 121 LLVM_PREFERRED_TYPE(tok::TokenKind) 122 unsigned TokenID : 9; 123 124 LLVM_PREFERRED_TYPE(InterestingIdentifier) 125 unsigned InterestingIdentifierID : InterestingIdentifierBits; 126 127 // True if there is a #define for this. 128 LLVM_PREFERRED_TYPE(bool) 129 unsigned HasMacro : 1; 130 131 // True if there was a #define for this. 132 LLVM_PREFERRED_TYPE(bool) 133 unsigned HadMacro : 1; 134 135 // True if the identifier is a language extension. 136 LLVM_PREFERRED_TYPE(bool) 137 unsigned IsExtension : 1; 138 139 // True if the identifier is a keyword in a newer or proposed Standard. 140 LLVM_PREFERRED_TYPE(bool) 141 unsigned IsFutureCompatKeyword : 1; 142 143 // True if the identifier is poisoned. 144 LLVM_PREFERRED_TYPE(bool) 145 unsigned IsPoisoned : 1; 146 147 // True if the identifier is a C++ operator keyword. 148 LLVM_PREFERRED_TYPE(bool) 149 unsigned IsCPPOperatorKeyword : 1; 150 151 // Internal bit set by the member function RecomputeNeedsHandleIdentifier. 152 // See comment about RecomputeNeedsHandleIdentifier for more info. 153 LLVM_PREFERRED_TYPE(bool) 154 unsigned NeedsHandleIdentifier : 1; 155 156 // True if the identifier was loaded (at least partially) from an AST file. 157 LLVM_PREFERRED_TYPE(bool) 158 unsigned IsFromAST : 1; 159 160 // True if the identifier has changed from the definition 161 // loaded from an AST file. 162 LLVM_PREFERRED_TYPE(bool) 163 unsigned ChangedAfterLoad : 1; 164 165 // True if the identifier's frontend information has changed from the 166 // definition loaded from an AST file. 167 LLVM_PREFERRED_TYPE(bool) 168 unsigned FEChangedAfterLoad : 1; 169 170 // True if revertTokenIDToIdentifier was called. 171 LLVM_PREFERRED_TYPE(bool) 172 unsigned RevertedTokenID : 1; 173 174 // True if there may be additional information about 175 // this identifier stored externally. 176 LLVM_PREFERRED_TYPE(bool) 177 unsigned OutOfDate : 1; 178 179 // True if this is the 'import' contextual keyword. 180 LLVM_PREFERRED_TYPE(bool) 181 unsigned IsModulesImport : 1; 182 183 // True if this is a mangled OpenMP variant name. 184 LLVM_PREFERRED_TYPE(bool) 185 unsigned IsMangledOpenMPVariantName : 1; 186 187 // True if this is a deprecated macro. 188 LLVM_PREFERRED_TYPE(bool) 189 unsigned IsDeprecatedMacro : 1; 190 191 // True if this macro is unsafe in headers. 192 LLVM_PREFERRED_TYPE(bool) 193 unsigned IsRestrictExpansion : 1; 194 195 // True if this macro is final. 196 LLVM_PREFERRED_TYPE(bool) 197 unsigned IsFinal : 1; 198 199 // 22 bits left in a 64-bit word. 200 201 // Managed by the language front-end. 202 void *FETokenInfo = nullptr; 203 204 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr; 205 206 IdentifierInfo() 207 : TokenID(tok::identifier), 208 InterestingIdentifierID(llvm::to_underlying( 209 InterestingIdentifier::NotInterestingIdentifier)), 210 HasMacro(false), HadMacro(false), IsExtension(false), 211 IsFutureCompatKeyword(false), IsPoisoned(false), 212 IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false), 213 IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false), 214 RevertedTokenID(false), OutOfDate(false), IsModulesImport(false), 215 IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false), 216 IsRestrictExpansion(false), IsFinal(false) {} 217 218 public: 219 IdentifierInfo(const IdentifierInfo &) = delete; 220 IdentifierInfo &operator=(const IdentifierInfo &) = delete; 221 IdentifierInfo(IdentifierInfo &&) = delete; 222 IdentifierInfo &operator=(IdentifierInfo &&) = delete; 223 224 /// Return true if this is the identifier for the specified string. 225 /// 226 /// This is intended to be used for string literals only: II->isStr("foo"). 227 template <std::size_t StrLen> 228 bool isStr(const char (&Str)[StrLen]) const { 229 return getLength() == StrLen-1 && 230 memcmp(getNameStart(), Str, StrLen-1) == 0; 231 } 232 233 /// Return true if this is the identifier for the specified StringRef. 234 bool isStr(llvm::StringRef Str) const { 235 llvm::StringRef ThisStr(getNameStart(), getLength()); 236 return ThisStr == Str; 237 } 238 239 /// Return the beginning of the actual null-terminated string for this 240 /// identifier. 241 const char *getNameStart() const { return Entry->getKeyData(); } 242 243 /// Efficiently return the length of this identifier info. 244 unsigned getLength() const { return Entry->getKeyLength(); } 245 246 /// Return the actual identifier string. 247 StringRef getName() const { 248 return StringRef(getNameStart(), getLength()); 249 } 250 251 /// Return true if this identifier is \#defined to some other value. 252 /// \note The current definition may be in a module and not currently visible. 253 bool hasMacroDefinition() const { 254 return HasMacro; 255 } 256 void setHasMacroDefinition(bool Val) { 257 if (HasMacro == Val) return; 258 259 HasMacro = Val; 260 if (Val) { 261 NeedsHandleIdentifier = true; 262 HadMacro = true; 263 } else { 264 // If this is a final macro, make the deprecation and header unsafe bits 265 // stick around after the undefinition so they apply to any redefinitions. 266 if (!IsFinal) { 267 // Because calling the setters of these calls recomputes, just set them 268 // manually to avoid recomputing a bunch of times. 269 IsDeprecatedMacro = false; 270 IsRestrictExpansion = false; 271 } 272 RecomputeNeedsHandleIdentifier(); 273 } 274 } 275 /// Returns true if this identifier was \#defined to some value at any 276 /// moment. In this case there should be an entry for the identifier in the 277 /// macro history table in Preprocessor. 278 bool hadMacroDefinition() const { 279 return HadMacro; 280 } 281 282 bool isDeprecatedMacro() const { return IsDeprecatedMacro; } 283 284 void setIsDeprecatedMacro(bool Val) { 285 if (IsDeprecatedMacro == Val) 286 return; 287 IsDeprecatedMacro = Val; 288 if (Val) 289 NeedsHandleIdentifier = true; 290 else 291 RecomputeNeedsHandleIdentifier(); 292 } 293 294 bool isRestrictExpansion() const { return IsRestrictExpansion; } 295 296 void setIsRestrictExpansion(bool Val) { 297 if (IsRestrictExpansion == Val) 298 return; 299 IsRestrictExpansion = Val; 300 if (Val) 301 NeedsHandleIdentifier = true; 302 else 303 RecomputeNeedsHandleIdentifier(); 304 } 305 306 bool isFinal() const { return IsFinal; } 307 308 void setIsFinal(bool Val) { IsFinal = Val; } 309 310 /// If this is a source-language token (e.g. 'for'), this API 311 /// can be used to cause the lexer to map identifiers to source-language 312 /// tokens. 313 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; } 314 315 /// True if revertTokenIDToIdentifier() was called. 316 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; } 317 318 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2 319 /// compatibility. 320 /// 321 /// TokenID is normally read-only but there are 2 instances where we revert it 322 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens 323 /// using this method so we can inform serialization about it. 324 void revertTokenIDToIdentifier() { 325 assert(TokenID != tok::identifier && "Already at tok::identifier"); 326 TokenID = tok::identifier; 327 RevertedTokenID = true; 328 } 329 void revertIdentifierToTokenID(tok::TokenKind TK) { 330 assert(TokenID == tok::identifier && "Should be at tok::identifier"); 331 TokenID = TK; 332 RevertedTokenID = false; 333 } 334 335 /// Return the preprocessor keyword ID for this identifier. 336 /// 337 /// For example, "define" will return tok::pp_define. 338 tok::PPKeywordKind getPPKeywordID() const; 339 340 /// Return the Objective-C keyword ID for the this identifier. 341 /// 342 /// For example, 'class' will return tok::objc_class if ObjC is enabled. 343 tok::ObjCKeywordKind getObjCKeywordID() const { 344 assert(0 == llvm::to_underlying(InterestingIdentifier::objc_not_keyword)); 345 auto Value = static_cast<InterestingIdentifier>(InterestingIdentifierID); 346 if (Value < InterestingIdentifier::NUM_OBJC_KEYWORDS) 347 return static_cast<tok::ObjCKeywordKind>(InterestingIdentifierID); 348 return tok::objc_not_keyword; 349 } 350 void setObjCKeywordID(tok::ObjCKeywordKind ID) { 351 assert(0 == llvm::to_underlying(InterestingIdentifier::objc_not_keyword)); 352 InterestingIdentifierID = ID; 353 assert(getObjCKeywordID() == ID && "ID too large for field!"); 354 } 355 356 /// Return a value indicating whether this is a builtin function. 357 unsigned getBuiltinID() const { 358 auto Value = static_cast<InterestingIdentifier>(InterestingIdentifierID); 359 if (Value > 360 InterestingIdentifier::NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS && 361 Value != InterestingIdentifier::NotInterestingIdentifier) { 362 auto FirstBuiltin = 363 llvm::to_underlying(InterestingIdentifier::NotBuiltin); 364 return static_cast<Builtin::ID>(InterestingIdentifierID - FirstBuiltin); 365 } 366 return Builtin::ID::NotBuiltin; 367 } 368 void setBuiltinID(unsigned ID) { 369 assert(ID != Builtin::ID::NotBuiltin); 370 auto FirstBuiltin = llvm::to_underlying(InterestingIdentifier::NotBuiltin); 371 InterestingIdentifierID = ID + FirstBuiltin; 372 assert(getBuiltinID() == ID && "ID too large for field!"); 373 } 374 void clearBuiltinID() { 375 InterestingIdentifierID = 376 llvm::to_underlying(InterestingIdentifier::NotInterestingIdentifier); 377 } 378 379 tok::NotableIdentifierKind getNotableIdentifierID() const { 380 auto Value = static_cast<InterestingIdentifier>(InterestingIdentifierID); 381 if (Value > InterestingIdentifier::NUM_OBJC_KEYWORDS && 382 Value < 383 InterestingIdentifier::NUM_OBJC_KEYWORDS_AND_NOTABLE_IDENTIFIERS) { 384 auto FirstNotableIdentifier = 385 1 + llvm::to_underlying(InterestingIdentifier::NUM_OBJC_KEYWORDS); 386 return static_cast<tok::NotableIdentifierKind>(InterestingIdentifierID - 387 FirstNotableIdentifier); 388 } 389 return tok::not_notable; 390 } 391 void setNotableIdentifierID(unsigned ID) { 392 assert(ID != tok::not_notable); 393 auto FirstNotableIdentifier = 394 1 + llvm::to_underlying(InterestingIdentifier::NUM_OBJC_KEYWORDS); 395 InterestingIdentifierID = ID + FirstNotableIdentifier; 396 assert(getNotableIdentifierID() == ID && "ID too large for field!"); 397 } 398 399 unsigned getObjCOrBuiltinID() const { return InterestingIdentifierID; } 400 void setObjCOrBuiltinID(unsigned ID) { InterestingIdentifierID = ID; } 401 402 /// get/setExtension - Initialize information about whether or not this 403 /// language token is an extension. This controls extension warnings, and is 404 /// only valid if a custom token ID is set. 405 bool isExtensionToken() const { return IsExtension; } 406 void setIsExtensionToken(bool Val) { 407 IsExtension = Val; 408 if (Val) 409 NeedsHandleIdentifier = true; 410 else 411 RecomputeNeedsHandleIdentifier(); 412 } 413 414 /// is/setIsFutureCompatKeyword - Initialize information about whether or not 415 /// this language token is a keyword in a newer or proposed Standard. This 416 /// controls compatibility warnings, and is only true when not parsing the 417 /// corresponding Standard. Once a compatibility problem has been diagnosed 418 /// with this keyword, the flag will be cleared. 419 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; } 420 void setIsFutureCompatKeyword(bool Val) { 421 IsFutureCompatKeyword = Val; 422 if (Val) 423 NeedsHandleIdentifier = true; 424 else 425 RecomputeNeedsHandleIdentifier(); 426 } 427 428 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the 429 /// Preprocessor will emit an error every time this token is used. 430 void setIsPoisoned(bool Value = true) { 431 IsPoisoned = Value; 432 if (Value) 433 NeedsHandleIdentifier = true; 434 else 435 RecomputeNeedsHandleIdentifier(); 436 } 437 438 /// Return true if this token has been poisoned. 439 bool isPoisoned() const { return IsPoisoned; } 440 441 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether 442 /// this identifier is a C++ alternate representation of an operator. 443 void setIsCPlusPlusOperatorKeyword(bool Val = true) { 444 IsCPPOperatorKeyword = Val; 445 } 446 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } 447 448 /// Return true if this token is a keyword in the specified language. 449 bool isKeyword(const LangOptions &LangOpts) const; 450 451 /// Return true if this token is a C++ keyword in the specified 452 /// language. 453 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const; 454 455 /// Get and set FETokenInfo. The language front-end is allowed to associate 456 /// arbitrary metadata with this token. 457 void *getFETokenInfo() const { return FETokenInfo; } 458 void setFETokenInfo(void *T) { FETokenInfo = T; } 459 460 /// Return true if the Preprocessor::HandleIdentifier must be called 461 /// on a token of this identifier. 462 /// 463 /// If this returns false, we know that HandleIdentifier will not affect 464 /// the token. 465 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } 466 467 /// Return true if the identifier in its current state was loaded 468 /// from an AST file. 469 bool isFromAST() const { return IsFromAST; } 470 471 void setIsFromAST() { IsFromAST = true; } 472 473 /// Determine whether this identifier has changed since it was loaded 474 /// from an AST file. 475 bool hasChangedSinceDeserialization() const { 476 return ChangedAfterLoad; 477 } 478 479 /// Note that this identifier has changed since it was loaded from 480 /// an AST file. 481 void setChangedSinceDeserialization() { 482 ChangedAfterLoad = true; 483 } 484 485 /// Determine whether the frontend token information for this 486 /// identifier has changed since it was loaded from an AST file. 487 bool hasFETokenInfoChangedSinceDeserialization() const { 488 return FEChangedAfterLoad; 489 } 490 491 /// Note that the frontend token information for this identifier has 492 /// changed since it was loaded from an AST file. 493 void setFETokenInfoChangedSinceDeserialization() { 494 FEChangedAfterLoad = true; 495 } 496 497 /// Determine whether the information for this identifier is out of 498 /// date with respect to the external source. 499 bool isOutOfDate() const { return OutOfDate; } 500 501 /// Set whether the information for this identifier is out of 502 /// date with respect to the external source. 503 void setOutOfDate(bool OOD) { 504 OutOfDate = OOD; 505 if (OOD) 506 NeedsHandleIdentifier = true; 507 else 508 RecomputeNeedsHandleIdentifier(); 509 } 510 511 /// Determine whether this is the contextual keyword \c import. 512 bool isModulesImport() const { return IsModulesImport; } 513 514 /// Set whether this identifier is the contextual keyword \c import. 515 void setModulesImport(bool I) { 516 IsModulesImport = I; 517 if (I) 518 NeedsHandleIdentifier = true; 519 else 520 RecomputeNeedsHandleIdentifier(); 521 } 522 523 /// Determine whether this is the mangled name of an OpenMP variant. 524 bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } 525 526 /// Set whether this is the mangled name of an OpenMP variant. 527 void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; } 528 529 /// Return true if this identifier is an editor placeholder. 530 /// 531 /// Editor placeholders are produced by the code-completion engine and are 532 /// represented as characters between '<#' and '#>' in the source code. An 533 /// example of auto-completed call with a placeholder parameter is shown 534 /// below: 535 /// \code 536 /// function(<#int x#>); 537 /// \endcode 538 bool isEditorPlaceholder() const { 539 return getName().starts_with("<#") && getName().ends_with("#>"); 540 } 541 542 /// Determine whether \p this is a name reserved for the implementation (C99 543 /// 7.1.3, C++ [lib.global.names]). 544 ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const; 545 546 /// Determine whether \p this is a name reserved for future standardization or 547 /// the implementation (C++ [usrlit.suffix]). 548 ReservedLiteralSuffixIdStatus isReservedLiteralSuffixId() const; 549 550 /// If the identifier is an "uglified" reserved name, return a cleaned form. 551 /// e.g. _Foo => Foo. Otherwise, just returns the name. 552 StringRef deuglifiedName() const; 553 bool isPlaceholder() const { 554 return getLength() == 1 && getNameStart()[0] == '_'; 555 } 556 557 /// Provide less than operator for lexicographical sorting. 558 bool operator<(const IdentifierInfo &RHS) const { 559 return getName() < RHS.getName(); 560 } 561 562 private: 563 /// The Preprocessor::HandleIdentifier does several special (but rare) 564 /// things to identifiers of various sorts. For example, it changes the 565 /// \c for keyword token from tok::identifier to tok::for. 566 /// 567 /// This method is very tied to the definition of HandleIdentifier. Any 568 /// change to it should be reflected here. 569 void RecomputeNeedsHandleIdentifier() { 570 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() || 571 isExtensionToken() || isFutureCompatKeyword() || 572 isOutOfDate() || isModulesImport(); 573 } 574 }; 575 576 /// An RAII object for [un]poisoning an identifier within a scope. 577 /// 578 /// \p II is allowed to be null, in which case objects of this type have 579 /// no effect. 580 class PoisonIdentifierRAIIObject { 581 IdentifierInfo *const II; 582 const bool OldValue; 583 584 public: 585 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue) 586 : II(II), OldValue(II ? II->isPoisoned() : false) { 587 if(II) 588 II->setIsPoisoned(NewValue); 589 } 590 591 ~PoisonIdentifierRAIIObject() { 592 if(II) 593 II->setIsPoisoned(OldValue); 594 } 595 }; 596 597 /// An iterator that walks over all of the known identifiers 598 /// in the lookup table. 599 /// 600 /// Since this iterator uses an abstract interface via virtual 601 /// functions, it uses an object-oriented interface rather than the 602 /// more standard C++ STL iterator interface. In this OO-style 603 /// iteration, the single function \c Next() provides dereference, 604 /// advance, and end-of-sequence checking in a single 605 /// operation. Subclasses of this iterator type will provide the 606 /// actual functionality. 607 class IdentifierIterator { 608 protected: 609 IdentifierIterator() = default; 610 611 public: 612 IdentifierIterator(const IdentifierIterator &) = delete; 613 IdentifierIterator &operator=(const IdentifierIterator &) = delete; 614 615 virtual ~IdentifierIterator(); 616 617 /// Retrieve the next string in the identifier table and 618 /// advances the iterator for the following string. 619 /// 620 /// \returns The next string in the identifier table. If there is 621 /// no such string, returns an empty \c StringRef. 622 virtual StringRef Next() = 0; 623 }; 624 625 /// Provides lookups to, and iteration over, IdentiferInfo objects. 626 class IdentifierInfoLookup { 627 public: 628 virtual ~IdentifierInfoLookup(); 629 630 /// Return the IdentifierInfo for the specified named identifier. 631 /// 632 /// Unlike the version in IdentifierTable, this returns a pointer instead 633 /// of a reference. If the pointer is null then the IdentifierInfo cannot 634 /// be found. 635 virtual IdentifierInfo* get(StringRef Name) = 0; 636 637 /// Retrieve an iterator into the set of all identifiers 638 /// known to this identifier lookup source. 639 /// 640 /// This routine provides access to all of the identifiers known to 641 /// the identifier lookup, allowing access to the contents of the 642 /// identifiers without introducing the overhead of constructing 643 /// IdentifierInfo objects for each. 644 /// 645 /// \returns A new iterator into the set of known identifiers. The 646 /// caller is responsible for deleting this iterator. 647 virtual IdentifierIterator *getIdentifiers(); 648 }; 649 650 /// Implements an efficient mapping from strings to IdentifierInfo nodes. 651 /// 652 /// This has no other purpose, but this is an extremely performance-critical 653 /// piece of the code, as each occurrence of every identifier goes through 654 /// here when lexed. 655 class IdentifierTable { 656 // Shark shows that using MallocAllocator is *much* slower than using this 657 // BumpPtrAllocator! 658 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>; 659 HashTableTy HashTable; 660 661 IdentifierInfoLookup* ExternalLookup; 662 663 public: 664 /// Create the identifier table. 665 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr); 666 667 /// Create the identifier table, populating it with info about the 668 /// language keywords for the language specified by \p LangOpts. 669 explicit IdentifierTable(const LangOptions &LangOpts, 670 IdentifierInfoLookup *ExternalLookup = nullptr); 671 672 /// Set the external identifier lookup mechanism. 673 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) { 674 ExternalLookup = IILookup; 675 } 676 677 /// Retrieve the external identifier lookup object, if any. 678 IdentifierInfoLookup *getExternalIdentifierLookup() const { 679 return ExternalLookup; 680 } 681 682 llvm::BumpPtrAllocator& getAllocator() { 683 return HashTable.getAllocator(); 684 } 685 686 /// Return the identifier token info for the specified named 687 /// identifier. 688 IdentifierInfo &get(StringRef Name) { 689 auto &Entry = *HashTable.try_emplace(Name, nullptr).first; 690 691 IdentifierInfo *&II = Entry.second; 692 if (II) return *II; 693 694 // No entry; if we have an external lookup, look there first. 695 if (ExternalLookup) { 696 II = ExternalLookup->get(Name); 697 if (II) 698 return *II; 699 } 700 701 // Lookups failed, make a new IdentifierInfo. 702 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 703 II = new (Mem) IdentifierInfo(); 704 705 // Make sure getName() knows how to find the IdentifierInfo 706 // contents. 707 II->Entry = &Entry; 708 709 return *II; 710 } 711 712 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) { 713 IdentifierInfo &II = get(Name); 714 II.TokenID = TokenCode; 715 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large"); 716 return II; 717 } 718 719 /// Gets an IdentifierInfo for the given name without consulting 720 /// external sources. 721 /// 722 /// This is a version of get() meant for external sources that want to 723 /// introduce or modify an identifier. If they called get(), they would 724 /// likely end up in a recursion. 725 IdentifierInfo &getOwn(StringRef Name) { 726 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first; 727 728 IdentifierInfo *&II = Entry.second; 729 if (II) 730 return *II; 731 732 // Lookups failed, make a new IdentifierInfo. 733 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 734 II = new (Mem) IdentifierInfo(); 735 736 // Make sure getName() knows how to find the IdentifierInfo 737 // contents. 738 II->Entry = &Entry; 739 740 // If this is the 'import' contextual keyword, mark it as such. 741 if (Name == "import") 742 II->setModulesImport(true); 743 744 return *II; 745 } 746 747 using iterator = HashTableTy::const_iterator; 748 using const_iterator = HashTableTy::const_iterator; 749 750 iterator begin() const { return HashTable.begin(); } 751 iterator end() const { return HashTable.end(); } 752 unsigned size() const { return HashTable.size(); } 753 754 iterator find(StringRef Name) const { return HashTable.find(Name); } 755 756 /// Print some statistics to stderr that indicate how well the 757 /// hashing is doing. 758 void PrintStats() const; 759 760 /// Populate the identifier table with info about the language keywords 761 /// for the language specified by \p LangOpts. 762 void AddKeywords(const LangOptions &LangOpts); 763 764 /// Returns the correct diagnostic to issue for a future-compat diagnostic 765 /// warning. Note, this function assumes the identifier passed has already 766 /// been determined to be a future compatible keyword. 767 diag::kind getFutureCompatDiagKind(const IdentifierInfo &II, 768 const LangOptions &LangOpts); 769 }; 770 771 /// A family of Objective-C methods. 772 /// 773 /// These families have no inherent meaning in the language, but are 774 /// nonetheless central enough in the existing implementations to 775 /// merit direct AST support. While, in theory, arbitrary methods can 776 /// be considered to form families, we focus here on the methods 777 /// involving allocation and retain-count management, as these are the 778 /// most "core" and the most likely to be useful to diverse clients 779 /// without extra information. 780 /// 781 /// Both selectors and actual method declarations may be classified 782 /// into families. Method families may impose additional restrictions 783 /// beyond their selector name; for example, a method called '_init' 784 /// that returns void is not considered to be in the 'init' family 785 /// (but would be if it returned 'id'). It is also possible to 786 /// explicitly change or remove a method's family. Therefore the 787 /// method's family should be considered the single source of truth. 788 enum ObjCMethodFamily { 789 /// No particular method family. 790 OMF_None, 791 792 // Selectors in these families may have arbitrary arity, may be 793 // written with arbitrary leading underscores, and may have 794 // additional CamelCase "words" in their first selector chunk 795 // following the family name. 796 OMF_alloc, 797 OMF_copy, 798 OMF_init, 799 OMF_mutableCopy, 800 OMF_new, 801 802 // These families are singletons consisting only of the nullary 803 // selector with the given name. 804 OMF_autorelease, 805 OMF_dealloc, 806 OMF_finalize, 807 OMF_release, 808 OMF_retain, 809 OMF_retainCount, 810 OMF_self, 811 OMF_initialize, 812 813 // performSelector families 814 OMF_performSelector 815 }; 816 817 /// Enough bits to store any enumerator in ObjCMethodFamily or 818 /// InvalidObjCMethodFamily. 819 enum { ObjCMethodFamilyBitWidth = 4 }; 820 821 /// An invalid value of ObjCMethodFamily. 822 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 }; 823 824 /// A family of Objective-C methods. 825 /// 826 /// These are family of methods whose result type is initially 'id', but 827 /// but are candidate for the result type to be changed to 'instancetype'. 828 enum ObjCInstanceTypeFamily { 829 OIT_None, 830 OIT_Array, 831 OIT_Dictionary, 832 OIT_Singleton, 833 OIT_Init, 834 OIT_ReturnsSelf 835 }; 836 837 enum ObjCStringFormatFamily { 838 SFF_None, 839 SFF_NSString, 840 SFF_CFString 841 }; 842 843 namespace detail { 844 845 /// DeclarationNameExtra is used as a base of various uncommon special names. 846 /// This class is needed since DeclarationName has not enough space to store 847 /// the kind of every possible names. Therefore the kind of common names is 848 /// stored directly in DeclarationName, and the kind of uncommon names is 849 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because 850 /// DeclarationName needs the lower 3 bits to store the kind of common names. 851 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change 852 /// here is very likely to require changes in DeclarationName(Table). 853 class alignas(IdentifierInfoAlignment) DeclarationNameExtra { 854 friend class clang::DeclarationName; 855 friend class clang::DeclarationNameTable; 856 857 protected: 858 /// The kind of "extra" information stored in the DeclarationName. See 859 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values 860 /// are used. Note that DeclarationName depends on the numerical values 861 /// of the enumerators in this enum. See DeclarationName::StoredNameKind 862 /// for more info. 863 enum ExtraKind { 864 CXXDeductionGuideName, 865 CXXLiteralOperatorName, 866 CXXUsingDirective, 867 ObjCMultiArgSelector 868 }; 869 870 /// ExtraKindOrNumArgs has one of the following meaning: 871 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra 872 /// is in this case in fact either a CXXDeductionGuideNameExtra or 873 /// a CXXLiteralOperatorIdName. 874 /// 875 /// * It may be also name common to C++ using-directives (CXXUsingDirective), 876 /// 877 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is 878 /// the number of arguments in the Objective-C selector, in which 879 /// case the DeclarationNameExtra is also a MultiKeywordSelector. 880 unsigned ExtraKindOrNumArgs; 881 882 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {} 883 DeclarationNameExtra(unsigned NumArgs) 884 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {} 885 886 /// Return the corresponding ExtraKind. 887 ExtraKind getKind() const { 888 return static_cast<ExtraKind>(ExtraKindOrNumArgs > 889 (unsigned)ObjCMultiArgSelector 890 ? (unsigned)ObjCMultiArgSelector 891 : ExtraKindOrNumArgs); 892 } 893 894 /// Return the number of arguments in an ObjC selector. Only valid when this 895 /// is indeed an ObjCMultiArgSelector. 896 unsigned getNumArgs() const { 897 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector && 898 "getNumArgs called but this is not an ObjC selector!"); 899 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector; 900 } 901 }; 902 903 } // namespace detail 904 905 /// One of these variable length records is kept for each 906 /// selector containing more than one keyword. We use a folding set 907 /// to unique aggregate names (keyword selectors in ObjC parlance). Access to 908 /// this class is provided strictly through Selector. 909 class alignas(IdentifierInfoAlignment) MultiKeywordSelector 910 : public detail::DeclarationNameExtra, 911 public llvm::FoldingSetNode { 912 MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {} 913 914 public: 915 // Constructor for keyword selectors. 916 MultiKeywordSelector(unsigned nKeys, const IdentifierInfo **IIV) 917 : DeclarationNameExtra(nKeys) { 918 assert((nKeys > 1) && "not a multi-keyword selector"); 919 920 // Fill in the trailing keyword array. 921 const IdentifierInfo **KeyInfo = 922 reinterpret_cast<const IdentifierInfo **>(this + 1); 923 for (unsigned i = 0; i != nKeys; ++i) 924 KeyInfo[i] = IIV[i]; 925 } 926 927 // getName - Derive the full selector name and return it. 928 std::string getName() const; 929 930 using DeclarationNameExtra::getNumArgs; 931 932 using keyword_iterator = const IdentifierInfo *const *; 933 934 keyword_iterator keyword_begin() const { 935 return reinterpret_cast<keyword_iterator>(this + 1); 936 } 937 938 keyword_iterator keyword_end() const { 939 return keyword_begin() + getNumArgs(); 940 } 941 942 const IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const { 943 assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index"); 944 return keyword_begin()[i]; 945 } 946 947 static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys, 948 unsigned NumArgs) { 949 ID.AddInteger(NumArgs); 950 for (unsigned i = 0; i != NumArgs; ++i) 951 ID.AddPointer(ArgTys[i]); 952 } 953 954 void Profile(llvm::FoldingSetNodeID &ID) { 955 Profile(ID, keyword_begin(), getNumArgs()); 956 } 957 }; 958 959 /// Smart pointer class that efficiently represents Objective-C method 960 /// names. 961 /// 962 /// This class will either point to an IdentifierInfo or a 963 /// MultiKeywordSelector (which is private). This enables us to optimize 964 /// selectors that take no arguments and selectors that take 1 argument, which 965 /// accounts for 78% of all selectors in Cocoa.h. 966 class Selector { 967 friend class Diagnostic; 968 friend class SelectorTable; // only the SelectorTable can create these 969 friend class DeclarationName; // and the AST's DeclarationName. 970 971 enum IdentifierInfoFlag { 972 // Empty selector = 0. Note that these enumeration values must 973 // correspond to the enumeration values of DeclarationName::StoredNameKind 974 ZeroArg = 0x01, 975 OneArg = 0x02, 976 // IMPORTANT NOTE: see comments in InfoPtr (below) about this enumerator 977 // value. 978 MultiArg = 0x07, 979 }; 980 981 /// IMPORTANT NOTE: the order of the types in this PointerUnion are 982 /// important! The DeclarationName class has bidirectional conversion 983 /// to/from Selector through an opaque pointer (void *) which corresponds 984 /// to this PointerIntPair. The discriminator bit from the PointerUnion 985 /// corresponds to the high bit in the MultiArg enumerator. So while this 986 /// PointerIntPair only has two bits for the integer (and we mask off the 987 /// high bit in `MultiArg` when it is used), that discrimator bit is 988 /// still necessary for the opaque conversion. The discriminator bit 989 /// from the PointerUnion and the two integer bits from the 990 /// PointerIntPair are also exposed via the DeclarationName::StoredNameKind 991 /// enumeration; see the comments in DeclarationName.h for more details. 992 /// Do not reorder or add any arguments to this template 993 /// without thoroughly understanding how tightly coupled these classes are. 994 llvm::PointerIntPair< 995 llvm::PointerUnion<const IdentifierInfo *, MultiKeywordSelector *>, 2> 996 InfoPtr; 997 998 Selector(const IdentifierInfo *II, unsigned nArgs) { 999 assert(nArgs < 2 && "nArgs not equal to 0/1"); 1000 InfoPtr.setPointerAndInt(II, nArgs + 1); 1001 } 1002 1003 Selector(MultiKeywordSelector *SI) { 1004 // IMPORTANT NOTE: we mask off the upper bit of this value because we only 1005 // reserve two bits for the integer in the PointerIntPair. See the comments 1006 // in `InfoPtr` for more details. 1007 InfoPtr.setPointerAndInt(SI, MultiArg & 0b11); 1008 } 1009 1010 const IdentifierInfo *getAsIdentifierInfo() const { 1011 return dyn_cast_if_present<const IdentifierInfo *>(InfoPtr.getPointer()); 1012 } 1013 1014 MultiKeywordSelector *getMultiKeywordSelector() const { 1015 return cast<MultiKeywordSelector *>(InfoPtr.getPointer()); 1016 } 1017 1018 unsigned getIdentifierInfoFlag() const { 1019 unsigned new_flags = InfoPtr.getInt(); 1020 // IMPORTANT NOTE: We have to reconstitute this data rather than use the 1021 // value directly from the PointerIntPair. See the comments in `InfoPtr` 1022 // for more details. 1023 if (isa<MultiKeywordSelector *>(InfoPtr.getPointer())) 1024 new_flags |= MultiArg; 1025 return new_flags; 1026 } 1027 1028 static ObjCMethodFamily getMethodFamilyImpl(Selector sel); 1029 1030 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel); 1031 1032 public: 1033 /// The default ctor should only be used when creating data structures that 1034 /// will contain selectors. 1035 Selector() = default; 1036 explicit Selector(uintptr_t V) { 1037 InfoPtr.setFromOpaqueValue(reinterpret_cast<void *>(V)); 1038 } 1039 1040 /// operator==/!= - Indicate whether the specified selectors are identical. 1041 bool operator==(Selector RHS) const { 1042 return InfoPtr.getOpaqueValue() == RHS.InfoPtr.getOpaqueValue(); 1043 } 1044 bool operator!=(Selector RHS) const { 1045 return InfoPtr.getOpaqueValue() != RHS.InfoPtr.getOpaqueValue(); 1046 } 1047 1048 void *getAsOpaquePtr() const { return InfoPtr.getOpaqueValue(); } 1049 1050 /// Determine whether this is the empty selector. 1051 bool isNull() const { return InfoPtr.getOpaqueValue() == nullptr; } 1052 1053 // Predicates to identify the selector type. 1054 bool isKeywordSelector() const { return InfoPtr.getInt() != ZeroArg; } 1055 1056 bool isUnarySelector() const { return InfoPtr.getInt() == ZeroArg; } 1057 1058 /// If this selector is the specific keyword selector described by Names. 1059 bool isKeywordSelector(ArrayRef<StringRef> Names) const; 1060 1061 /// If this selector is the specific unary selector described by Name. 1062 bool isUnarySelector(StringRef Name) const; 1063 1064 unsigned getNumArgs() const; 1065 1066 /// Retrieve the identifier at a given position in the selector. 1067 /// 1068 /// Note that the identifier pointer returned may be NULL. Clients that only 1069 /// care about the text of the identifier string, and not the specific, 1070 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns 1071 /// an empty string when the identifier pointer would be NULL. 1072 /// 1073 /// \param argIndex The index for which we want to retrieve the identifier. 1074 /// This index shall be less than \c getNumArgs() unless this is a keyword 1075 /// selector, in which case 0 is the only permissible value. 1076 /// 1077 /// \returns the uniqued identifier for this slot, or NULL if this slot has 1078 /// no corresponding identifier. 1079 const IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; 1080 1081 /// Retrieve the name at a given position in the selector. 1082 /// 1083 /// \param argIndex The index for which we want to retrieve the name. 1084 /// This index shall be less than \c getNumArgs() unless this is a keyword 1085 /// selector, in which case 0 is the only permissible value. 1086 /// 1087 /// \returns the name for this slot, which may be the empty string if no 1088 /// name was supplied. 1089 StringRef getNameForSlot(unsigned argIndex) const; 1090 1091 /// Derive the full selector name (e.g. "foo:bar:") and return 1092 /// it as an std::string. 1093 std::string getAsString() const; 1094 1095 /// Prints the full selector name (e.g. "foo:bar:"). 1096 void print(llvm::raw_ostream &OS) const; 1097 1098 void dump() const; 1099 1100 /// Derive the conventional family of this method. 1101 ObjCMethodFamily getMethodFamily() const { 1102 return getMethodFamilyImpl(*this); 1103 } 1104 1105 ObjCStringFormatFamily getStringFormatFamily() const { 1106 return getStringFormatFamilyImpl(*this); 1107 } 1108 1109 static Selector getEmptyMarker() { 1110 return Selector(uintptr_t(-1)); 1111 } 1112 1113 static Selector getTombstoneMarker() { 1114 return Selector(uintptr_t(-2)); 1115 } 1116 1117 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel); 1118 }; 1119 1120 /// This table allows us to fully hide how we implement 1121 /// multi-keyword caching. 1122 class SelectorTable { 1123 // Actually a SelectorTableImpl 1124 void *Impl; 1125 1126 public: 1127 SelectorTable(); 1128 SelectorTable(const SelectorTable &) = delete; 1129 SelectorTable &operator=(const SelectorTable &) = delete; 1130 ~SelectorTable(); 1131 1132 /// Can create any sort of selector. 1133 /// 1134 /// \p NumArgs indicates whether this is a no argument selector "foo", a 1135 /// single argument selector "foo:" or multi-argument "foo:bar:". 1136 Selector getSelector(unsigned NumArgs, const IdentifierInfo **IIV); 1137 1138 Selector getUnarySelector(const IdentifierInfo *ID) { 1139 return Selector(ID, 1); 1140 } 1141 1142 Selector getNullarySelector(const IdentifierInfo *ID) { 1143 return Selector(ID, 0); 1144 } 1145 1146 /// Return the total amount of memory allocated for managing selectors. 1147 size_t getTotalMemory() const; 1148 1149 /// Return the default setter name for the given identifier. 1150 /// 1151 /// This is "set" + \p Name where the initial character of \p Name 1152 /// has been capitalized. 1153 static SmallString<64> constructSetterName(StringRef Name); 1154 1155 /// Return the default setter selector for the given identifier. 1156 /// 1157 /// This is "set" + \p Name where the initial character of \p Name 1158 /// has been capitalized. 1159 static Selector constructSetterSelector(IdentifierTable &Idents, 1160 SelectorTable &SelTable, 1161 const IdentifierInfo *Name); 1162 1163 /// Return the property name for the given setter selector. 1164 static std::string getPropertyNameFromSetterSelector(Selector Sel); 1165 }; 1166 1167 } // namespace clang 1168 1169 namespace llvm { 1170 1171 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and 1172 /// DenseSets. 1173 template <> 1174 struct DenseMapInfo<clang::Selector> { 1175 static clang::Selector getEmptyKey() { 1176 return clang::Selector::getEmptyMarker(); 1177 } 1178 1179 static clang::Selector getTombstoneKey() { 1180 return clang::Selector::getTombstoneMarker(); 1181 } 1182 1183 static unsigned getHashValue(clang::Selector S); 1184 1185 static bool isEqual(clang::Selector LHS, clang::Selector RHS) { 1186 return LHS == RHS; 1187 } 1188 }; 1189 1190 template<> 1191 struct PointerLikeTypeTraits<clang::Selector> { 1192 static const void *getAsVoidPointer(clang::Selector P) { 1193 return P.getAsOpaquePtr(); 1194 } 1195 1196 static clang::Selector getFromVoidPointer(const void *P) { 1197 return clang::Selector(reinterpret_cast<uintptr_t>(P)); 1198 } 1199 1200 static constexpr int NumLowBitsAvailable = 0; 1201 }; 1202 1203 } // namespace llvm 1204 1205 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 1206