xref: /llvm-project/clang/lib/Basic/IdentifierTable.cpp (revision c8554e13eec048180d003af2aa7b2cc8498d4fba)
1 //===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdentifierInfo, IdentifierVisitor, and
10 // IdentifierTable interfaces.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/IdentifierTable.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/DiagnosticLex.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/OperatorKinds.h"
19 #include "clang/Basic/Specifiers.h"
20 #include "clang/Basic/TargetBuiltins.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/DenseMapInfo.h"
23 #include "llvm/ADT/FoldingSet.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringMap.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 #include <cstdio>
31 #include <cstring>
32 #include <string>
33 
34 using namespace clang;
35 
36 // A check to make sure the ObjCOrBuiltinID has sufficient room to store the
37 // largest possible target/aux-target combination. If we exceed this, we likely
38 // need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
39 static_assert(2 * LargestBuiltinID < (2 << (InterestingIdentifierBits - 1)),
40               "Insufficient ObjCOrBuiltinID Bits");
41 
42 //===----------------------------------------------------------------------===//
43 // IdentifierTable Implementation
44 //===----------------------------------------------------------------------===//
45 
46 IdentifierIterator::~IdentifierIterator() = default;
47 
48 IdentifierInfoLookup::~IdentifierInfoLookup() = default;
49 
50 namespace {
51 
52 /// A simple identifier lookup iterator that represents an
53 /// empty sequence of identifiers.
54 class EmptyLookupIterator : public IdentifierIterator {
55 public:
56   StringRef Next() override { return StringRef(); }
57 };
58 
59 } // namespace
60 
61 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
62   return new EmptyLookupIterator();
63 }
64 
65 IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
66     : HashTable(8192), // Start with space for 8K identifiers.
67       ExternalLookup(ExternalLookup) {}
68 
69 IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
70                                  IdentifierInfoLookup *ExternalLookup)
71     : IdentifierTable(ExternalLookup) {
72   // Populate the identifier table with info about keywords for the current
73   // language.
74   AddKeywords(LangOpts);
75 }
76 
77 //===----------------------------------------------------------------------===//
78 // Language Keyword Implementation
79 //===----------------------------------------------------------------------===//
80 
81 // Constants for TokenKinds.def
82 namespace {
83 
84 enum TokenKey : unsigned {
85   KEYC99 = 0x1,
86   KEYCXX = 0x2,
87   KEYCXX11 = 0x4,
88   KEYGNU = 0x8,
89   KEYMS = 0x10,
90   BOOLSUPPORT = 0x20,
91   KEYALTIVEC = 0x40,
92   KEYNOCXX = 0x80,
93   KEYBORLAND = 0x100,
94   KEYOPENCLC = 0x200,
95   KEYC23 = 0x400,
96   KEYNOMS18 = 0x800,
97   KEYNOOPENCL = 0x1000,
98   WCHARSUPPORT = 0x2000,
99   HALFSUPPORT = 0x4000,
100   CHAR8SUPPORT = 0x8000,
101   KEYOBJC = 0x10000,
102   KEYZVECTOR = 0x20000,
103   KEYCOROUTINES = 0x40000,
104   KEYMODULES = 0x80000,
105   KEYCXX20 = 0x100000,
106   KEYOPENCLCXX = 0x200000,
107   KEYMSCOMPAT = 0x400000,
108   KEYSYCL = 0x800000,
109   KEYCUDA = 0x1000000,
110   KEYZOS = 0x2000000,
111   KEYNOZOS = 0x4000000,
112   KEYHLSL = 0x8000000,
113   KEYFIXEDPOINT = 0x10000000,
114   KEYMAX = KEYFIXEDPOINT, // The maximum key
115   KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
116   KEYALL = (KEYMAX | (KEYMAX - 1)) & ~KEYNOMS18 & ~KEYNOOPENCL &
117            ~KEYNOZOS // KEYNOMS18, KEYNOOPENCL, KEYNOZOS are excluded.
118 };
119 
120 /// How a keyword is treated in the selected standard. This enum is ordered
121 /// intentionally so that the value that 'wins' is the most 'permissive'.
122 enum KeywordStatus {
123   KS_Unknown,   // Not yet calculated. Used when figuring out the status.
124   KS_Disabled,  // Disabled
125   KS_Future,    // Is a keyword in future standard
126   KS_Extension, // Is an extension
127   KS_Enabled,   // Enabled
128 };
129 
130 } // namespace
131 
132 // This works on a single TokenKey flag and checks the LangOpts to get the
133 // KeywordStatus based exclusively on this flag, so that it can be merged in
134 // getKeywordStatus. Most should be enabled/disabled, but some might imply
135 // 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
136 // be disabled, and the calling function makes it 'disabled' if no other flag
137 // changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
138 static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
139                                             TokenKey Flag) {
140   // Flag is a single bit version of TokenKey (that is, not
141   // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
142   assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
143 
144   switch (Flag) {
145   case KEYC99:
146     if (LangOpts.C99)
147       return KS_Enabled;
148     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
149   case KEYC23:
150     if (LangOpts.C23)
151       return KS_Enabled;
152     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
153   case KEYCXX:
154     return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
155   case KEYCXX11:
156     if (LangOpts.CPlusPlus11)
157       return KS_Enabled;
158     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
159   case KEYCXX20:
160     if (LangOpts.CPlusPlus20)
161       return KS_Enabled;
162     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
163   case KEYGNU:
164     return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
165   case KEYMS:
166     return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
167   case BOOLSUPPORT:
168     if (LangOpts.Bool)      return KS_Enabled;
169     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
170   case KEYALTIVEC:
171     return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
172   case KEYBORLAND:
173     return LangOpts.Borland ? KS_Extension : KS_Unknown;
174   case KEYOPENCLC:
175     return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
176                                                         : KS_Unknown;
177   case WCHARSUPPORT:
178     return LangOpts.WChar ? KS_Enabled : KS_Unknown;
179   case HALFSUPPORT:
180     return LangOpts.Half ? KS_Enabled : KS_Unknown;
181   case CHAR8SUPPORT:
182     if (LangOpts.Char8) return KS_Enabled;
183     if (LangOpts.CPlusPlus20) return KS_Unknown;
184     if (LangOpts.CPlusPlus) return KS_Future;
185     return KS_Unknown;
186   case KEYOBJC:
187     // We treat bridge casts as objective-C keywords so we can warn on them
188     // in non-arc mode.
189     return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
190   case KEYZVECTOR:
191     return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
192   case KEYCOROUTINES:
193     return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
194   case KEYMODULES:
195     return KS_Unknown;
196   case KEYOPENCLCXX:
197     return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
198   case KEYMSCOMPAT:
199     return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
200   case KEYSYCL:
201     return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
202   case KEYCUDA:
203     return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
204   case KEYZOS:
205     return LangOpts.ZOSExt ? KS_Enabled : KS_Unknown;
206   case KEYHLSL:
207     return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
208   case KEYNOCXX:
209     // This is enabled in all non-C++ modes, but might be enabled for other
210     // reasons as well.
211     return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
212   case KEYNOOPENCL:
213   case KEYNOMS18:
214   case KEYNOZOS:
215     // The disable behavior for this is handled in getKeywordStatus.
216     return KS_Unknown;
217   case KEYFIXEDPOINT:
218     return LangOpts.FixedPoint ? KS_Enabled : KS_Disabled;
219   default:
220     llvm_unreachable("Unknown KeywordStatus flag");
221   }
222 }
223 
224 /// Translates flags as specified in TokenKinds.def into keyword status
225 /// in the given language standard.
226 static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
227                                       unsigned Flags) {
228   // KEYALL means always enabled, so special case this one.
229   if (Flags == KEYALL) return KS_Enabled;
230   // These are tests that need to 'always win', as they are special in that they
231   // disable based on certain conditions.
232   if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
233   if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
234       !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
235     return KS_Disabled;
236   if (LangOpts.ZOSExt && (Flags & KEYNOZOS))
237     return KS_Disabled;
238   KeywordStatus CurStatus = KS_Unknown;
239 
240   while (Flags != 0) {
241     unsigned CurFlag = Flags & ~(Flags - 1);
242     Flags = Flags & ~CurFlag;
243     CurStatus = std::max(
244         CurStatus,
245         getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag)));
246   }
247 
248   if (CurStatus == KS_Unknown)
249     return KS_Disabled;
250   return CurStatus;
251 }
252 
253 /// AddKeyword - This method is used to associate a token ID with specific
254 /// identifiers because they are language keywords.  This causes the lexer to
255 /// automatically map matching identifiers to specialized token codes.
256 static void AddKeyword(StringRef Keyword,
257                        tok::TokenKind TokenCode, unsigned Flags,
258                        const LangOptions &LangOpts, IdentifierTable &Table) {
259   KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
260 
261   // Don't add this keyword if disabled in this language.
262   if (AddResult == KS_Disabled) return;
263 
264   IdentifierInfo &Info =
265       Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
266   Info.setIsExtensionToken(AddResult == KS_Extension);
267   Info.setIsFutureCompatKeyword(AddResult == KS_Future);
268 }
269 
270 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
271 /// representations.
272 static void AddCXXOperatorKeyword(StringRef Keyword,
273                                   tok::TokenKind TokenCode,
274                                   IdentifierTable &Table) {
275   IdentifierInfo &Info = Table.get(Keyword, TokenCode);
276   Info.setIsCPlusPlusOperatorKeyword();
277 }
278 
279 /// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
280 /// or "property".
281 static void AddObjCKeyword(StringRef Name,
282                            tok::ObjCKeywordKind ObjCID,
283                            IdentifierTable &Table) {
284   Table.get(Name).setObjCKeywordID(ObjCID);
285 }
286 
287 static void AddNotableIdentifier(StringRef Name,
288                                  tok::NotableIdentifierKind BTID,
289                                  IdentifierTable &Table) {
290   // Don't add 'not_notable' identifier.
291   if (BTID != tok::not_notable) {
292     IdentifierInfo &Info = Table.get(Name, tok::identifier);
293     Info.setNotableIdentifierID(BTID);
294   }
295 }
296 
297 /// AddKeywords - Add all keywords to the symbol table.
298 ///
299 void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
300   // Add keywords and tokens for the current language.
301 #define KEYWORD(NAME, FLAGS) \
302   AddKeyword(StringRef(#NAME), tok::kw_ ## NAME,  \
303              FLAGS, LangOpts, *this);
304 #define ALIAS(NAME, TOK, FLAGS) \
305   AddKeyword(StringRef(NAME), tok::kw_ ## TOK,  \
306              FLAGS, LangOpts, *this);
307 #define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
308   if (LangOpts.CXXOperatorNames)          \
309     AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this);
310 #define OBJC_AT_KEYWORD(NAME)  \
311   if (LangOpts.ObjC)           \
312     AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
313 #define NOTABLE_IDENTIFIER(NAME)                                               \
314   AddNotableIdentifier(StringRef(#NAME), tok::NAME, *this);
315 
316 #define TESTING_KEYWORD(NAME, FLAGS)
317 #include "clang/Basic/TokenKinds.def"
318 
319   if (LangOpts.ParseUnknownAnytype)
320     AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
321                LangOpts, *this);
322 
323   if (LangOpts.DeclSpecKeyword)
324     AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
325 
326   if (LangOpts.IEEE128)
327     AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
328 
329   // Add the 'import' contextual keyword.
330   get("import").setModulesImport(true);
331 }
332 
333 /// Checks if the specified token kind represents a keyword in the
334 /// specified language.
335 /// \returns Status of the keyword in the language.
336 static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
337                                       tok::TokenKind K) {
338   switch (K) {
339 #define KEYWORD(NAME, FLAGS) \
340   case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
341 #include "clang/Basic/TokenKinds.def"
342   default: return KS_Disabled;
343   }
344 }
345 
346 /// Returns true if the identifier represents a keyword in the
347 /// specified language.
348 bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
349   switch (getTokenKwStatus(LangOpts, getTokenID())) {
350   case KS_Enabled:
351   case KS_Extension:
352     return true;
353   default:
354     return false;
355   }
356 }
357 
358 /// Returns true if the identifier represents a C++ keyword in the
359 /// specified language.
360 bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
361   if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
362     return false;
363   // This is a C++ keyword if this identifier is not a keyword when checked
364   // using LangOptions without C++ support.
365   LangOptions LangOptsNoCPP = LangOpts;
366   LangOptsNoCPP.CPlusPlus = false;
367   LangOptsNoCPP.CPlusPlus11 = false;
368   LangOptsNoCPP.CPlusPlus20 = false;
369   return !isKeyword(LangOptsNoCPP);
370 }
371 
372 ReservedIdentifierStatus
373 IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
374   StringRef Name = getName();
375 
376   // '_' is a reserved identifier, but its use is so common (e.g. to store
377   // ignored values) that we don't warn on it.
378   if (Name.size() <= 1)
379     return ReservedIdentifierStatus::NotReserved;
380 
381   // [lex.name] p3
382   if (Name[0] == '_') {
383 
384     // Each name that begins with an underscore followed by an uppercase letter
385     // or another underscore is reserved.
386     if (Name[1] == '_')
387       return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
388 
389     if ('A' <= Name[1] && Name[1] <= 'Z')
390       return ReservedIdentifierStatus::
391           StartsWithUnderscoreFollowedByCapitalLetter;
392 
393     // This is a bit misleading: it actually means it's only reserved if we're
394     // at global scope because it starts with an underscore.
395     return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
396   }
397 
398   // Each name that contains a double underscore (__) is reserved.
399   if (LangOpts.CPlusPlus && Name.contains("__"))
400     return ReservedIdentifierStatus::ContainsDoubleUnderscore;
401 
402   return ReservedIdentifierStatus::NotReserved;
403 }
404 
405 ReservedLiteralSuffixIdStatus
406 IdentifierInfo::isReservedLiteralSuffixId() const {
407   StringRef Name = getName();
408 
409   // Note: the diag::warn_deprecated_literal_operator_id diagnostic depends on
410   // this being the first check we do, so if this order changes, we have to fix
411   // that as well.
412   if (Name[0] != '_')
413     return ReservedLiteralSuffixIdStatus::NotStartsWithUnderscore;
414 
415   if (Name.contains("__"))
416     return ReservedLiteralSuffixIdStatus::ContainsDoubleUnderscore;
417 
418   return ReservedLiteralSuffixIdStatus::NotReserved;
419 }
420 
421 StringRef IdentifierInfo::deuglifiedName() const {
422   StringRef Name = getName();
423   if (Name.size() >= 2 && Name.front() == '_' &&
424       (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
425     return Name.ltrim('_');
426   return Name;
427 }
428 
429 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
430   // We use a perfect hash function here involving the length of the keyword,
431   // the first and third character.  For preprocessor ID's there are no
432   // collisions (if there were, the switch below would complain about duplicate
433   // case values).  Note that this depends on 'if' being null terminated.
434 
435 #define HASH(LEN, FIRST, THIRD)                                                \
436   (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
437 #define CASE(LEN, FIRST, THIRD, NAME) \
438   case HASH(LEN, FIRST, THIRD): \
439     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
440 
441   unsigned Len = getLength();
442   if (Len < 2) return tok::pp_not_keyword;
443   const char *Name = getNameStart();
444   switch (HASH(Len, Name[0], Name[2])) {
445   default: return tok::pp_not_keyword;
446   CASE( 2, 'i', '\0', if);
447   CASE( 4, 'e', 'i', elif);
448   CASE( 4, 'e', 's', else);
449   CASE( 4, 'l', 'n', line);
450   CASE( 4, 's', 'c', sccs);
451   CASE( 5, 'e', 'b', embed);
452   CASE( 5, 'e', 'd', endif);
453   CASE( 5, 'e', 'r', error);
454   CASE( 5, 'i', 'e', ident);
455   CASE( 5, 'i', 'd', ifdef);
456   CASE( 5, 'u', 'd', undef);
457 
458   CASE( 6, 'a', 's', assert);
459   CASE( 6, 'd', 'f', define);
460   CASE( 6, 'i', 'n', ifndef);
461   CASE( 6, 'i', 'p', import);
462   CASE( 6, 'p', 'a', pragma);
463 
464   CASE( 7, 'd', 'f', defined);
465   CASE( 7, 'e', 'i', elifdef);
466   CASE( 7, 'i', 'c', include);
467   CASE( 7, 'w', 'r', warning);
468 
469   CASE( 8, 'e', 'i', elifndef);
470   CASE( 8, 'u', 'a', unassert);
471   CASE(12, 'i', 'c', include_next);
472 
473   CASE(14, '_', 'p', __public_macro);
474 
475   CASE(15, '_', 'p', __private_macro);
476 
477   CASE(16, '_', 'i', __include_macros);
478 #undef CASE
479 #undef HASH
480   }
481 }
482 
483 //===----------------------------------------------------------------------===//
484 // Stats Implementation
485 //===----------------------------------------------------------------------===//
486 
487 /// PrintStats - Print statistics about how well the identifier table is doing
488 /// at hashing identifiers.
489 void IdentifierTable::PrintStats() const {
490   unsigned NumBuckets = HashTable.getNumBuckets();
491   unsigned NumIdentifiers = HashTable.getNumItems();
492   unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
493   unsigned AverageIdentifierSize = 0;
494   unsigned MaxIdentifierLength = 0;
495 
496   // TODO: Figure out maximum times an identifier had to probe for -stats.
497   for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
498        I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
499     unsigned IdLen = I->getKeyLength();
500     AverageIdentifierSize += IdLen;
501     if (MaxIdentifierLength < IdLen)
502       MaxIdentifierLength = IdLen;
503   }
504 
505   fprintf(stderr, "\n*** Identifier Table Stats:\n");
506   fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
507   fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
508   fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
509           NumIdentifiers/(double)NumBuckets);
510   fprintf(stderr, "Ave identifier length: %f\n",
511           (AverageIdentifierSize/(double)NumIdentifiers));
512   fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
513 
514   // Compute statistics about the memory allocated for identifiers.
515   HashTable.getAllocator().PrintStats();
516 }
517 
518 //===----------------------------------------------------------------------===//
519 // SelectorTable Implementation
520 //===----------------------------------------------------------------------===//
521 
522 unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
523   return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
524 }
525 
526 bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
527   assert(!Names.empty() && "must have >= 1 selector slots");
528   if (getNumArgs() != Names.size())
529     return false;
530   for (unsigned I = 0, E = Names.size(); I != E; ++I) {
531     if (getNameForSlot(I) != Names[I])
532       return false;
533   }
534   return true;
535 }
536 
537 bool Selector::isUnarySelector(StringRef Name) const {
538   return isUnarySelector() && getNameForSlot(0) == Name;
539 }
540 
541 unsigned Selector::getNumArgs() const {
542   unsigned IIF = getIdentifierInfoFlag();
543   if (IIF <= ZeroArg)
544     return 0;
545   if (IIF == OneArg)
546     return 1;
547   // We point to a MultiKeywordSelector.
548   MultiKeywordSelector *SI = getMultiKeywordSelector();
549   return SI->getNumArgs();
550 }
551 
552 const IdentifierInfo *
553 Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
554   if (getIdentifierInfoFlag() < MultiArg) {
555     assert(argIndex == 0 && "illegal keyword index");
556     return getAsIdentifierInfo();
557   }
558 
559   // We point to a MultiKeywordSelector.
560   MultiKeywordSelector *SI = getMultiKeywordSelector();
561   return SI->getIdentifierInfoForSlot(argIndex);
562 }
563 
564 StringRef Selector::getNameForSlot(unsigned int argIndex) const {
565   const IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
566   return II ? II->getName() : StringRef();
567 }
568 
569 std::string MultiKeywordSelector::getName() const {
570   SmallString<256> Str;
571   llvm::raw_svector_ostream OS(Str);
572   for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
573     if (*I)
574       OS << (*I)->getName();
575     OS << ':';
576   }
577 
578   return std::string(OS.str());
579 }
580 
581 std::string Selector::getAsString() const {
582   if (isNull())
583     return "<null selector>";
584 
585   if (getIdentifierInfoFlag() < MultiArg) {
586     const IdentifierInfo *II = getAsIdentifierInfo();
587 
588     if (getNumArgs() == 0) {
589       assert(II && "If the number of arguments is 0 then II is guaranteed to "
590                    "not be null.");
591       return std::string(II->getName());
592     }
593 
594     if (!II)
595       return ":";
596 
597     return II->getName().str() + ":";
598   }
599 
600   // We have a multiple keyword selector.
601   return getMultiKeywordSelector()->getName();
602 }
603 
604 void Selector::print(llvm::raw_ostream &OS) const {
605   OS << getAsString();
606 }
607 
608 LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); }
609 
610 /// Interpreting the given string using the normal CamelCase
611 /// conventions, determine whether the given string starts with the
612 /// given "word", which is assumed to end in a lowercase letter.
613 static bool startsWithWord(StringRef name, StringRef word) {
614   if (name.size() < word.size()) return false;
615   return ((name.size() == word.size() || !isLowercase(name[word.size()])) &&
616           name.starts_with(word));
617 }
618 
619 ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
620   const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
621   if (!first) return OMF_None;
622 
623   StringRef name = first->getName();
624   if (sel.isUnarySelector()) {
625     if (name == "autorelease") return OMF_autorelease;
626     if (name == "dealloc") return OMF_dealloc;
627     if (name == "finalize") return OMF_finalize;
628     if (name == "release") return OMF_release;
629     if (name == "retain") return OMF_retain;
630     if (name == "retainCount") return OMF_retainCount;
631     if (name == "self") return OMF_self;
632     if (name == "initialize") return OMF_initialize;
633   }
634 
635   if (name == "performSelector" || name == "performSelectorInBackground" ||
636       name == "performSelectorOnMainThread")
637     return OMF_performSelector;
638 
639   // The other method families may begin with a prefix of underscores.
640   name = name.ltrim('_');
641 
642   if (name.empty()) return OMF_None;
643   switch (name.front()) {
644   case 'a':
645     if (startsWithWord(name, "alloc")) return OMF_alloc;
646     break;
647   case 'c':
648     if (startsWithWord(name, "copy")) return OMF_copy;
649     break;
650   case 'i':
651     if (startsWithWord(name, "init")) return OMF_init;
652     break;
653   case 'm':
654     if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy;
655     break;
656   case 'n':
657     if (startsWithWord(name, "new")) return OMF_new;
658     break;
659   default:
660     break;
661   }
662 
663   return OMF_None;
664 }
665 
666 ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
667   const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
668   if (!first) return OIT_None;
669 
670   StringRef name = first->getName();
671 
672   if (name.empty()) return OIT_None;
673   switch (name.front()) {
674     case 'a':
675       if (startsWithWord(name, "array")) return OIT_Array;
676       break;
677     case 'd':
678       if (startsWithWord(name, "default")) return OIT_ReturnsSelf;
679       if (startsWithWord(name, "dictionary")) return OIT_Dictionary;
680       break;
681     case 's':
682       if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
683       if (startsWithWord(name, "standard")) return OIT_Singleton;
684       break;
685     case 'i':
686       if (startsWithWord(name, "init")) return OIT_Init;
687       break;
688     default:
689       break;
690   }
691   return OIT_None;
692 }
693 
694 ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
695   const IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
696   if (!first) return SFF_None;
697 
698   StringRef name = first->getName();
699 
700   switch (name.front()) {
701     case 'a':
702       if (name == "appendFormat") return SFF_NSString;
703       break;
704 
705     case 'i':
706       if (name == "initWithFormat") return SFF_NSString;
707       break;
708 
709     case 'l':
710       if (name == "localizedStringWithFormat") return SFF_NSString;
711       break;
712 
713     case 's':
714       if (name == "stringByAppendingFormat" ||
715           name == "stringWithFormat") return SFF_NSString;
716       break;
717   }
718   return SFF_None;
719 }
720 
721 namespace {
722 
723 struct SelectorTableImpl {
724   llvm::FoldingSet<MultiKeywordSelector> Table;
725   llvm::BumpPtrAllocator Allocator;
726 };
727 
728 } // namespace
729 
730 static SelectorTableImpl &getSelectorTableImpl(void *P) {
731   return *static_cast<SelectorTableImpl*>(P);
732 }
733 
734 SmallString<64>
735 SelectorTable::constructSetterName(StringRef Name) {
736   SmallString<64> SetterName("set");
737   SetterName += Name;
738   SetterName[3] = toUppercase(SetterName[3]);
739   return SetterName;
740 }
741 
742 Selector
743 SelectorTable::constructSetterSelector(IdentifierTable &Idents,
744                                        SelectorTable &SelTable,
745                                        const IdentifierInfo *Name) {
746   IdentifierInfo *SetterName =
747     &Idents.get(constructSetterName(Name->getName()));
748   return SelTable.getUnarySelector(SetterName);
749 }
750 
751 std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
752   StringRef Name = Sel.getNameForSlot(0);
753   assert(Name.starts_with("set") && "invalid setter name");
754   return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str();
755 }
756 
757 size_t SelectorTable::getTotalMemory() const {
758   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
759   return SelTabImpl.Allocator.getTotalMemory();
760 }
761 
762 Selector SelectorTable::getSelector(unsigned nKeys,
763                                     const IdentifierInfo **IIV) {
764   if (nKeys < 2)
765     return Selector(IIV[0], nKeys);
766 
767   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
768 
769   // Unique selector, to guarantee there is one per name.
770   llvm::FoldingSetNodeID ID;
771   MultiKeywordSelector::Profile(ID, IIV, nKeys);
772 
773   void *InsertPos = nullptr;
774   if (MultiKeywordSelector *SI =
775         SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
776     return Selector(SI);
777 
778   // MultiKeywordSelector objects are not allocated with new because they have a
779   // variable size array (for parameter types) at the end of them.
780   unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
781   MultiKeywordSelector *SI =
782       (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
783           Size, alignof(MultiKeywordSelector));
784   new (SI) MultiKeywordSelector(nKeys, IIV);
785   SelTabImpl.Table.InsertNode(SI, InsertPos);
786   return Selector(SI);
787 }
788 
789 SelectorTable::SelectorTable() {
790   Impl = new SelectorTableImpl();
791 }
792 
793 SelectorTable::~SelectorTable() {
794   delete &getSelectorTableImpl(Impl);
795 }
796 
797 const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
798   switch (Operator) {
799   case OO_None:
800   case NUM_OVERLOADED_OPERATORS:
801     return nullptr;
802 
803 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
804   case OO_##Name: return Spelling;
805 #include "clang/Basic/OperatorKinds.def"
806   }
807 
808   llvm_unreachable("Invalid OverloadedOperatorKind!");
809 }
810 
811 StringRef clang::getNullabilitySpelling(NullabilityKind kind,
812                                         bool isContextSensitive) {
813   switch (kind) {
814   case NullabilityKind::NonNull:
815     return isContextSensitive ? "nonnull" : "_Nonnull";
816 
817   case NullabilityKind::Nullable:
818     return isContextSensitive ? "nullable" : "_Nullable";
819 
820   case NullabilityKind::NullableResult:
821     assert(!isContextSensitive &&
822            "_Nullable_result isn't supported as context-sensitive keyword");
823     return "_Nullable_result";
824 
825   case NullabilityKind::Unspecified:
826     return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
827   }
828   llvm_unreachable("Unknown nullability kind.");
829 }
830 
831 llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
832                                      NullabilityKind NK) {
833   switch (NK) {
834   case NullabilityKind::NonNull:
835     return OS << "NonNull";
836   case NullabilityKind::Nullable:
837     return OS << "Nullable";
838   case NullabilityKind::NullableResult:
839     return OS << "NullableResult";
840   case NullabilityKind::Unspecified:
841     return OS << "Unspecified";
842   }
843   llvm_unreachable("Unknown nullability kind.");
844 }
845 
846 diag::kind
847 IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
848                                          const LangOptions &LangOpts) {
849   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
850 
851   unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
852 #define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
853 #include "clang/Basic/TokenKinds.def"
854 #undef KEYWORD
855       ;
856 
857   if (LangOpts.CPlusPlus) {
858     if ((Flags & KEYCXX11) == KEYCXX11)
859       return diag::warn_cxx11_keyword;
860 
861     // char8_t is not modeled as a CXX20_KEYWORD because it's not
862     // unconditionally enabled in C++20 mode. (It can be disabled
863     // by -fno-char8_t.)
864     if (((Flags & KEYCXX20) == KEYCXX20) ||
865         ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
866       return diag::warn_cxx20_keyword;
867   } else {
868     if ((Flags & KEYC99) == KEYC99)
869       return diag::warn_c99_keyword;
870     if ((Flags & KEYC23) == KEYC23)
871       return diag::warn_c23_keyword;
872   }
873 
874   llvm_unreachable(
875       "Keyword not known to come from a newer Standard or proposed Standard");
876 }
877