xref: /llvm-project/clang/lib/Basic/IdentifierTable.cpp (revision e77a01d79a48e15c94c89e4aa4bd27424a96b49b)
1 //===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdentifierInfo, IdentifierVisitor, and
10 // IdentifierTable interfaces.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/IdentifierTable.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/DiagnosticLex.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/OperatorKinds.h"
19 #include "clang/Basic/Specifiers.h"
20 #include "clang/Basic/TargetBuiltins.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/DenseMapInfo.h"
23 #include "llvm/ADT/FoldingSet.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringMap.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 #include <cstdio>
31 #include <cstring>
32 #include <string>
33 
34 using namespace clang;
35 
36 // A check to make sure the ObjCOrBuiltinID has sufficient room to store the
37 // largest possible target/aux-target combination. If we exceed this, we likely
38 // need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
39 static_assert(2 * LargestBuiltinID < (2 << (InterestingIdentifierBits - 1)),
40               "Insufficient ObjCOrBuiltinID Bits");
41 
42 //===----------------------------------------------------------------------===//
43 // IdentifierTable Implementation
44 //===----------------------------------------------------------------------===//
45 
46 IdentifierIterator::~IdentifierIterator() = default;
47 
48 IdentifierInfoLookup::~IdentifierInfoLookup() = default;
49 
50 namespace {
51 
52 /// A simple identifier lookup iterator that represents an
53 /// empty sequence of identifiers.
54 class EmptyLookupIterator : public IdentifierIterator {
55 public:
56   StringRef Next() override { return StringRef(); }
57 };
58 
59 } // namespace
60 
61 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
62   return new EmptyLookupIterator();
63 }
64 
65 IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
66     : HashTable(8192), // Start with space for 8K identifiers.
67       ExternalLookup(ExternalLookup) {}
68 
69 IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
70                                  IdentifierInfoLookup *ExternalLookup)
71     : IdentifierTable(ExternalLookup) {
72   // Populate the identifier table with info about keywords for the current
73   // language.
74   AddKeywords(LangOpts);
75 }
76 
77 //===----------------------------------------------------------------------===//
78 // Language Keyword Implementation
79 //===----------------------------------------------------------------------===//
80 
81 // Constants for TokenKinds.def
82 namespace {
83 
84   enum TokenKey : unsigned {
85     KEYC99        = 0x1,
86     KEYCXX        = 0x2,
87     KEYCXX11      = 0x4,
88     KEYGNU        = 0x8,
89     KEYMS         = 0x10,
90     BOOLSUPPORT   = 0x20,
91     KEYALTIVEC    = 0x40,
92     KEYNOCXX      = 0x80,
93     KEYBORLAND    = 0x100,
94     KEYOPENCLC    = 0x200,
95     KEYC23        = 0x400,
96     KEYNOMS18     = 0x800,
97     KEYNOOPENCL   = 0x1000,
98     WCHARSUPPORT  = 0x2000,
99     HALFSUPPORT   = 0x4000,
100     CHAR8SUPPORT  = 0x8000,
101     KEYOBJC       = 0x10000,
102     KEYZVECTOR    = 0x20000,
103     KEYCOROUTINES = 0x40000,
104     KEYMODULES    = 0x80000,
105     KEYCXX20      = 0x100000,
106     KEYOPENCLCXX  = 0x200000,
107     KEYMSCOMPAT   = 0x400000,
108     KEYSYCL       = 0x800000,
109     KEYCUDA       = 0x1000000,
110     KEYHLSL       = 0x2000000,
111     KEYFIXEDPOINT = 0x4000000,
112     KEYMAX        = KEYFIXEDPOINT, // The maximum key
113     KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
114     KEYALL = (KEYMAX | (KEYMAX-1)) & ~KEYNOMS18 &
115              ~KEYNOOPENCL // KEYNOMS18 and KEYNOOPENCL are used to exclude.
116   };
117 
118   /// How a keyword is treated in the selected standard. This enum is ordered
119   /// intentionally so that the value that 'wins' is the most 'permissive'.
120   enum KeywordStatus {
121     KS_Unknown,     // Not yet calculated. Used when figuring out the status.
122     KS_Disabled,    // Disabled
123     KS_Future,      // Is a keyword in future standard
124     KS_Extension,   // Is an extension
125     KS_Enabled,     // Enabled
126   };
127 
128 } // namespace
129 
130 // This works on a single TokenKey flag and checks the LangOpts to get the
131 // KeywordStatus based exclusively on this flag, so that it can be merged in
132 // getKeywordStatus. Most should be enabled/disabled, but some might imply
133 // 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
134 // be disabled, and the calling function makes it 'disabled' if no other flag
135 // changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
136 static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
137                                             TokenKey Flag) {
138   // Flag is a single bit version of TokenKey (that is, not
139   // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
140   assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
141 
142   switch (Flag) {
143   case KEYC99:
144     if (LangOpts.C99)
145       return KS_Enabled;
146     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
147   case KEYC23:
148     if (LangOpts.C23)
149       return KS_Enabled;
150     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
151   case KEYCXX:
152     return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
153   case KEYCXX11:
154     if (LangOpts.CPlusPlus11)
155       return KS_Enabled;
156     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
157   case KEYCXX20:
158     if (LangOpts.CPlusPlus20)
159       return KS_Enabled;
160     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
161   case KEYGNU:
162     return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
163   case KEYMS:
164     return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
165   case BOOLSUPPORT:
166     if (LangOpts.Bool)      return KS_Enabled;
167     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
168   case KEYALTIVEC:
169     return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
170   case KEYBORLAND:
171     return LangOpts.Borland ? KS_Extension : KS_Unknown;
172   case KEYOPENCLC:
173     return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
174                                                         : KS_Unknown;
175   case WCHARSUPPORT:
176     return LangOpts.WChar ? KS_Enabled : KS_Unknown;
177   case HALFSUPPORT:
178     return LangOpts.Half ? KS_Enabled : KS_Unknown;
179   case CHAR8SUPPORT:
180     if (LangOpts.Char8) return KS_Enabled;
181     if (LangOpts.CPlusPlus20) return KS_Unknown;
182     if (LangOpts.CPlusPlus) return KS_Future;
183     return KS_Unknown;
184   case KEYOBJC:
185     // We treat bridge casts as objective-C keywords so we can warn on them
186     // in non-arc mode.
187     return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
188   case KEYZVECTOR:
189     return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
190   case KEYCOROUTINES:
191     return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
192   case KEYMODULES:
193     return KS_Unknown;
194   case KEYOPENCLCXX:
195     return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
196   case KEYMSCOMPAT:
197     return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
198   case KEYSYCL:
199     return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
200   case KEYCUDA:
201     return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
202   case KEYHLSL:
203     return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
204   case KEYNOCXX:
205     // This is enabled in all non-C++ modes, but might be enabled for other
206     // reasons as well.
207     return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
208   case KEYNOOPENCL:
209     // The disable behavior for this is handled in getKeywordStatus.
210     return KS_Unknown;
211   case KEYNOMS18:
212     // The disable behavior for this is handled in getKeywordStatus.
213     return KS_Unknown;
214   case KEYFIXEDPOINT:
215     return LangOpts.FixedPoint ? KS_Enabled : KS_Disabled;
216   default:
217     llvm_unreachable("Unknown KeywordStatus flag");
218   }
219 }
220 
221 /// Translates flags as specified in TokenKinds.def into keyword status
222 /// in the given language standard.
223 static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
224                                       unsigned Flags) {
225   // KEYALL means always enabled, so special case this one.
226   if (Flags == KEYALL) return KS_Enabled;
227   // These are tests that need to 'always win', as they are special in that they
228   // disable based on certain conditions.
229   if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
230   if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
231       !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
232     return KS_Disabled;
233 
234   KeywordStatus CurStatus = KS_Unknown;
235 
236   while (Flags != 0) {
237     unsigned CurFlag = Flags & ~(Flags - 1);
238     Flags = Flags & ~CurFlag;
239     CurStatus = std::max(
240         CurStatus,
241         getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag)));
242   }
243 
244   if (CurStatus == KS_Unknown)
245     return KS_Disabled;
246   return CurStatus;
247 }
248 
249 /// AddKeyword - This method is used to associate a token ID with specific
250 /// identifiers because they are language keywords.  This causes the lexer to
251 /// automatically map matching identifiers to specialized token codes.
252 static void AddKeyword(StringRef Keyword,
253                        tok::TokenKind TokenCode, unsigned Flags,
254                        const LangOptions &LangOpts, IdentifierTable &Table) {
255   KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
256 
257   // Don't add this keyword if disabled in this language.
258   if (AddResult == KS_Disabled) return;
259 
260   IdentifierInfo &Info =
261       Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
262   Info.setIsExtensionToken(AddResult == KS_Extension);
263   Info.setIsFutureCompatKeyword(AddResult == KS_Future);
264 }
265 
266 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
267 /// representations.
268 static void AddCXXOperatorKeyword(StringRef Keyword,
269                                   tok::TokenKind TokenCode,
270                                   IdentifierTable &Table) {
271   IdentifierInfo &Info = Table.get(Keyword, TokenCode);
272   Info.setIsCPlusPlusOperatorKeyword();
273 }
274 
275 /// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
276 /// or "property".
277 static void AddObjCKeyword(StringRef Name,
278                            tok::ObjCKeywordKind ObjCID,
279                            IdentifierTable &Table) {
280   Table.get(Name).setObjCKeywordID(ObjCID);
281 }
282 
283 static void AddNotableIdentifier(StringRef Name,
284                                  tok::NotableIdentifierKind BTID,
285                                  IdentifierTable &Table) {
286   // Don't add 'not_notable' identifier.
287   if (BTID != tok::not_notable) {
288     IdentifierInfo &Info = Table.get(Name, tok::identifier);
289     Info.setNotableIdentifierID(BTID);
290   }
291 }
292 
293 /// AddKeywords - Add all keywords to the symbol table.
294 ///
295 void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
296   // Add keywords and tokens for the current language.
297 #define KEYWORD(NAME, FLAGS) \
298   AddKeyword(StringRef(#NAME), tok::kw_ ## NAME,  \
299              FLAGS, LangOpts, *this);
300 #define ALIAS(NAME, TOK, FLAGS) \
301   AddKeyword(StringRef(NAME), tok::kw_ ## TOK,  \
302              FLAGS, LangOpts, *this);
303 #define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
304   if (LangOpts.CXXOperatorNames)          \
305     AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this);
306 #define OBJC_AT_KEYWORD(NAME)  \
307   if (LangOpts.ObjC)           \
308     AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
309 #define NOTABLE_IDENTIFIER(NAME)                                               \
310   AddNotableIdentifier(StringRef(#NAME), tok::NAME, *this);
311 
312 #define TESTING_KEYWORD(NAME, FLAGS)
313 #include "clang/Basic/TokenKinds.def"
314 
315   if (LangOpts.ParseUnknownAnytype)
316     AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
317                LangOpts, *this);
318 
319   if (LangOpts.DeclSpecKeyword)
320     AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
321 
322   if (LangOpts.IEEE128)
323     AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
324 
325   // Add the 'import' and 'module' contextual keyword.
326   get("import").setModulesImport(true);
327   get("module").setModulesDeclaration(true);
328 }
329 
330 /// Checks if the specified token kind represents a keyword in the
331 /// specified language.
332 /// \returns Status of the keyword in the language.
333 static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
334                                       tok::TokenKind K) {
335   switch (K) {
336 #define KEYWORD(NAME, FLAGS) \
337   case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
338 #include "clang/Basic/TokenKinds.def"
339   default: return KS_Disabled;
340   }
341 }
342 
343 /// Returns true if the identifier represents a keyword in the
344 /// specified language.
345 bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
346   switch (getTokenKwStatus(LangOpts, getTokenID())) {
347   case KS_Enabled:
348   case KS_Extension:
349     return true;
350   default:
351     return false;
352   }
353 }
354 
355 /// Returns true if the identifier represents a C++ keyword in the
356 /// specified language.
357 bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
358   if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
359     return false;
360   // This is a C++ keyword if this identifier is not a keyword when checked
361   // using LangOptions without C++ support.
362   LangOptions LangOptsNoCPP = LangOpts;
363   LangOptsNoCPP.CPlusPlus = false;
364   LangOptsNoCPP.CPlusPlus11 = false;
365   LangOptsNoCPP.CPlusPlus20 = false;
366   return !isKeyword(LangOptsNoCPP);
367 }
368 
369 ReservedIdentifierStatus
370 IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
371   StringRef Name = getName();
372 
373   // '_' is a reserved identifier, but its use is so common (e.g. to store
374   // ignored values) that we don't warn on it.
375   if (Name.size() <= 1)
376     return ReservedIdentifierStatus::NotReserved;
377 
378   // [lex.name] p3
379   if (Name[0] == '_') {
380 
381     // Each name that begins with an underscore followed by an uppercase letter
382     // or another underscore is reserved.
383     if (Name[1] == '_')
384       return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
385 
386     if ('A' <= Name[1] && Name[1] <= 'Z')
387       return ReservedIdentifierStatus::
388           StartsWithUnderscoreFollowedByCapitalLetter;
389 
390     // This is a bit misleading: it actually means it's only reserved if we're
391     // at global scope because it starts with an underscore.
392     return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
393   }
394 
395   // Each name that contains a double underscore (__) is reserved.
396   if (LangOpts.CPlusPlus && Name.contains("__"))
397     return ReservedIdentifierStatus::ContainsDoubleUnderscore;
398 
399   return ReservedIdentifierStatus::NotReserved;
400 }
401 
402 ReservedLiteralSuffixIdStatus
403 IdentifierInfo::isReservedLiteralSuffixId() const {
404   StringRef Name = getName();
405 
406   if (Name[0] != '_')
407     return ReservedLiteralSuffixIdStatus::NotStartsWithUnderscore;
408 
409   if (Name.contains("__"))
410     return ReservedLiteralSuffixIdStatus::ContainsDoubleUnderscore;
411 
412   return ReservedLiteralSuffixIdStatus::NotReserved;
413 }
414 
415 StringRef IdentifierInfo::deuglifiedName() const {
416   StringRef Name = getName();
417   if (Name.size() >= 2 && Name.front() == '_' &&
418       (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
419     return Name.ltrim('_');
420   return Name;
421 }
422 
423 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
424   // We use a perfect hash function here involving the length of the keyword,
425   // the first and third character.  For preprocessor ID's there are no
426   // collisions (if there were, the switch below would complain about duplicate
427   // case values).  Note that this depends on 'if' being null terminated.
428 
429 #define HASH(LEN, FIRST, THIRD)                                                \
430   (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
431 #define CASE(LEN, FIRST, THIRD, NAME) \
432   case HASH(LEN, FIRST, THIRD): \
433     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
434 
435   unsigned Len = getLength();
436   if (Len < 2) return tok::pp_not_keyword;
437   const char *Name = getNameStart();
438   switch (HASH(Len, Name[0], Name[2])) {
439   default: return tok::pp_not_keyword;
440   CASE( 2, 'i', '\0', if);
441   CASE( 4, 'e', 'i', elif);
442   CASE( 4, 'e', 's', else);
443   CASE( 4, 'l', 'n', line);
444   CASE( 4, 's', 'c', sccs);
445   CASE( 5, 'e', 'b', embed);
446   CASE( 5, 'e', 'd', endif);
447   CASE( 5, 'e', 'r', error);
448   CASE( 5, 'i', 'e', ident);
449   CASE( 5, 'i', 'd', ifdef);
450   CASE( 5, 'u', 'd', undef);
451 
452   CASE( 6, 'a', 's', assert);
453   CASE( 6, 'd', 'f', define);
454   CASE( 6, 'i', 'n', ifndef);
455   CASE( 6, 'i', 'p', import);
456   CASE( 6, 'p', 'a', pragma);
457 
458   CASE( 7, 'd', 'f', defined);
459   CASE( 7, 'e', 'i', elifdef);
460   CASE( 7, 'i', 'c', include);
461   CASE( 7, 'w', 'r', warning);
462 
463   CASE( 8, 'e', 'i', elifndef);
464   CASE( 8, 'u', 'a', unassert);
465   CASE(12, 'i', 'c', include_next);
466 
467   CASE(14, '_', 'p', __public_macro);
468 
469   CASE(15, '_', 'p', __private_macro);
470 
471   CASE(16, '_', 'i', __include_macros);
472 #undef CASE
473 #undef HASH
474   }
475 }
476 
477 //===----------------------------------------------------------------------===//
478 // Stats Implementation
479 //===----------------------------------------------------------------------===//
480 
481 /// PrintStats - Print statistics about how well the identifier table is doing
482 /// at hashing identifiers.
483 void IdentifierTable::PrintStats() const {
484   unsigned NumBuckets = HashTable.getNumBuckets();
485   unsigned NumIdentifiers = HashTable.getNumItems();
486   unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
487   unsigned AverageIdentifierSize = 0;
488   unsigned MaxIdentifierLength = 0;
489 
490   // TODO: Figure out maximum times an identifier had to probe for -stats.
491   for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
492        I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
493     unsigned IdLen = I->getKeyLength();
494     AverageIdentifierSize += IdLen;
495     if (MaxIdentifierLength < IdLen)
496       MaxIdentifierLength = IdLen;
497   }
498 
499   fprintf(stderr, "\n*** Identifier Table Stats:\n");
500   fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
501   fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
502   fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
503           NumIdentifiers/(double)NumBuckets);
504   fprintf(stderr, "Ave identifier length: %f\n",
505           (AverageIdentifierSize/(double)NumIdentifiers));
506   fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
507 
508   // Compute statistics about the memory allocated for identifiers.
509   HashTable.getAllocator().PrintStats();
510 }
511 
512 //===----------------------------------------------------------------------===//
513 // SelectorTable Implementation
514 //===----------------------------------------------------------------------===//
515 
516 unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
517   return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
518 }
519 
520 bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
521   assert(!Names.empty() && "must have >= 1 selector slots");
522   if (getNumArgs() != Names.size())
523     return false;
524   for (unsigned I = 0, E = Names.size(); I != E; ++I) {
525     if (getNameForSlot(I) != Names[I])
526       return false;
527   }
528   return true;
529 }
530 
531 bool Selector::isUnarySelector(StringRef Name) const {
532   return isUnarySelector() && getNameForSlot(0) == Name;
533 }
534 
535 unsigned Selector::getNumArgs() const {
536   unsigned IIF = getIdentifierInfoFlag();
537   if (IIF <= ZeroArg)
538     return 0;
539   if (IIF == OneArg)
540     return 1;
541   // We point to a MultiKeywordSelector.
542   MultiKeywordSelector *SI = getMultiKeywordSelector();
543   return SI->getNumArgs();
544 }
545 
546 const IdentifierInfo *
547 Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
548   if (getIdentifierInfoFlag() < MultiArg) {
549     assert(argIndex == 0 && "illegal keyword index");
550     return getAsIdentifierInfo();
551   }
552 
553   // We point to a MultiKeywordSelector.
554   MultiKeywordSelector *SI = getMultiKeywordSelector();
555   return SI->getIdentifierInfoForSlot(argIndex);
556 }
557 
558 StringRef Selector::getNameForSlot(unsigned int argIndex) const {
559   const IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
560   return II ? II->getName() : StringRef();
561 }
562 
563 std::string MultiKeywordSelector::getName() const {
564   SmallString<256> Str;
565   llvm::raw_svector_ostream OS(Str);
566   for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
567     if (*I)
568       OS << (*I)->getName();
569     OS << ':';
570   }
571 
572   return std::string(OS.str());
573 }
574 
575 std::string Selector::getAsString() const {
576   if (isNull())
577     return "<null selector>";
578 
579   if (getIdentifierInfoFlag() < MultiArg) {
580     const IdentifierInfo *II = getAsIdentifierInfo();
581 
582     if (getNumArgs() == 0) {
583       assert(II && "If the number of arguments is 0 then II is guaranteed to "
584                    "not be null.");
585       return std::string(II->getName());
586     }
587 
588     if (!II)
589       return ":";
590 
591     return II->getName().str() + ":";
592   }
593 
594   // We have a multiple keyword selector.
595   return getMultiKeywordSelector()->getName();
596 }
597 
598 void Selector::print(llvm::raw_ostream &OS) const {
599   OS << getAsString();
600 }
601 
602 LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); }
603 
604 /// Interpreting the given string using the normal CamelCase
605 /// conventions, determine whether the given string starts with the
606 /// given "word", which is assumed to end in a lowercase letter.
607 static bool startsWithWord(StringRef name, StringRef word) {
608   if (name.size() < word.size()) return false;
609   return ((name.size() == word.size() || !isLowercase(name[word.size()])) &&
610           name.starts_with(word));
611 }
612 
613 ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
614   const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
615   if (!first) return OMF_None;
616 
617   StringRef name = first->getName();
618   if (sel.isUnarySelector()) {
619     if (name == "autorelease") return OMF_autorelease;
620     if (name == "dealloc") return OMF_dealloc;
621     if (name == "finalize") return OMF_finalize;
622     if (name == "release") return OMF_release;
623     if (name == "retain") return OMF_retain;
624     if (name == "retainCount") return OMF_retainCount;
625     if (name == "self") return OMF_self;
626     if (name == "initialize") return OMF_initialize;
627   }
628 
629   if (name == "performSelector" || name == "performSelectorInBackground" ||
630       name == "performSelectorOnMainThread")
631     return OMF_performSelector;
632 
633   // The other method families may begin with a prefix of underscores.
634   name = name.ltrim('_');
635 
636   if (name.empty()) return OMF_None;
637   switch (name.front()) {
638   case 'a':
639     if (startsWithWord(name, "alloc")) return OMF_alloc;
640     break;
641   case 'c':
642     if (startsWithWord(name, "copy")) return OMF_copy;
643     break;
644   case 'i':
645     if (startsWithWord(name, "init")) return OMF_init;
646     break;
647   case 'm':
648     if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy;
649     break;
650   case 'n':
651     if (startsWithWord(name, "new")) return OMF_new;
652     break;
653   default:
654     break;
655   }
656 
657   return OMF_None;
658 }
659 
660 ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
661   const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
662   if (!first) return OIT_None;
663 
664   StringRef name = first->getName();
665 
666   if (name.empty()) return OIT_None;
667   switch (name.front()) {
668     case 'a':
669       if (startsWithWord(name, "array")) return OIT_Array;
670       break;
671     case 'd':
672       if (startsWithWord(name, "default")) return OIT_ReturnsSelf;
673       if (startsWithWord(name, "dictionary")) return OIT_Dictionary;
674       break;
675     case 's':
676       if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
677       if (startsWithWord(name, "standard")) return OIT_Singleton;
678       break;
679     case 'i':
680       if (startsWithWord(name, "init")) return OIT_Init;
681       break;
682     default:
683       break;
684   }
685   return OIT_None;
686 }
687 
688 ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
689   const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
690   if (!first) return SFF_None;
691 
692   StringRef name = first->getName();
693 
694   switch (name.front()) {
695     case 'a':
696       if (name == "appendFormat") return SFF_NSString;
697       break;
698 
699     case 'i':
700       if (name == "initWithFormat") return SFF_NSString;
701       break;
702 
703     case 'l':
704       if (name == "localizedStringWithFormat") return SFF_NSString;
705       break;
706 
707     case 's':
708       if (name == "stringByAppendingFormat" ||
709           name == "stringWithFormat") return SFF_NSString;
710       break;
711   }
712   return SFF_None;
713 }
714 
715 namespace {
716 
717 struct SelectorTableImpl {
718   llvm::FoldingSet<MultiKeywordSelector> Table;
719   llvm::BumpPtrAllocator Allocator;
720 };
721 
722 } // namespace
723 
724 static SelectorTableImpl &getSelectorTableImpl(void *P) {
725   return *static_cast<SelectorTableImpl*>(P);
726 }
727 
728 SmallString<64>
729 SelectorTable::constructSetterName(StringRef Name) {
730   SmallString<64> SetterName("set");
731   SetterName += Name;
732   SetterName[3] = toUppercase(SetterName[3]);
733   return SetterName;
734 }
735 
736 Selector
737 SelectorTable::constructSetterSelector(IdentifierTable &Idents,
738                                        SelectorTable &SelTable,
739                                        const IdentifierInfo *Name) {
740   IdentifierInfo *SetterName =
741     &Idents.get(constructSetterName(Name->getName()));
742   return SelTable.getUnarySelector(SetterName);
743 }
744 
745 std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
746   StringRef Name = Sel.getNameForSlot(0);
747   assert(Name.starts_with("set") && "invalid setter name");
748   return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str();
749 }
750 
751 size_t SelectorTable::getTotalMemory() const {
752   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
753   return SelTabImpl.Allocator.getTotalMemory();
754 }
755 
756 Selector SelectorTable::getSelector(unsigned nKeys,
757                                     const IdentifierInfo **IIV) {
758   if (nKeys < 2)
759     return Selector(IIV[0], nKeys);
760 
761   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
762 
763   // Unique selector, to guarantee there is one per name.
764   llvm::FoldingSetNodeID ID;
765   MultiKeywordSelector::Profile(ID, IIV, nKeys);
766 
767   void *InsertPos = nullptr;
768   if (MultiKeywordSelector *SI =
769         SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
770     return Selector(SI);
771 
772   // MultiKeywordSelector objects are not allocated with new because they have a
773   // variable size array (for parameter types) at the end of them.
774   unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
775   MultiKeywordSelector *SI =
776       (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
777           Size, alignof(MultiKeywordSelector));
778   new (SI) MultiKeywordSelector(nKeys, IIV);
779   SelTabImpl.Table.InsertNode(SI, InsertPos);
780   return Selector(SI);
781 }
782 
783 SelectorTable::SelectorTable() {
784   Impl = new SelectorTableImpl();
785 }
786 
787 SelectorTable::~SelectorTable() {
788   delete &getSelectorTableImpl(Impl);
789 }
790 
791 const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
792   switch (Operator) {
793   case OO_None:
794   case NUM_OVERLOADED_OPERATORS:
795     return nullptr;
796 
797 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
798   case OO_##Name: return Spelling;
799 #include "clang/Basic/OperatorKinds.def"
800   }
801 
802   llvm_unreachable("Invalid OverloadedOperatorKind!");
803 }
804 
805 StringRef clang::getNullabilitySpelling(NullabilityKind kind,
806                                         bool isContextSensitive) {
807   switch (kind) {
808   case NullabilityKind::NonNull:
809     return isContextSensitive ? "nonnull" : "_Nonnull";
810 
811   case NullabilityKind::Nullable:
812     return isContextSensitive ? "nullable" : "_Nullable";
813 
814   case NullabilityKind::NullableResult:
815     assert(!isContextSensitive &&
816            "_Nullable_result isn't supported as context-sensitive keyword");
817     return "_Nullable_result";
818 
819   case NullabilityKind::Unspecified:
820     return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
821   }
822   llvm_unreachable("Unknown nullability kind.");
823 }
824 
825 llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
826                                      NullabilityKind NK) {
827   switch (NK) {
828   case NullabilityKind::NonNull:
829     return OS << "NonNull";
830   case NullabilityKind::Nullable:
831     return OS << "Nullable";
832   case NullabilityKind::NullableResult:
833     return OS << "NullableResult";
834   case NullabilityKind::Unspecified:
835     return OS << "Unspecified";
836   }
837   llvm_unreachable("Unknown nullability kind.");
838 }
839 
840 diag::kind
841 IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
842                                          const LangOptions &LangOpts) {
843   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
844 
845   unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
846 #define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
847 #include "clang/Basic/TokenKinds.def"
848 #undef KEYWORD
849       ;
850 
851   if (LangOpts.CPlusPlus) {
852     if ((Flags & KEYCXX11) == KEYCXX11)
853       return diag::warn_cxx11_keyword;
854 
855     // char8_t is not modeled as a CXX20_KEYWORD because it's not
856     // unconditionally enabled in C++20 mode. (It can be disabled
857     // by -fno-char8_t.)
858     if (((Flags & KEYCXX20) == KEYCXX20) ||
859         ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
860       return diag::warn_cxx20_keyword;
861   } else {
862     if ((Flags & KEYC99) == KEYC99)
863       return diag::warn_c99_keyword;
864     if ((Flags & KEYC23) == KEYC23)
865       return diag::warn_c23_keyword;
866   }
867 
868   llvm_unreachable(
869       "Keyword not known to come from a newer Standard or proposed Standard");
870 }
871