xref: /llvm-project/clang/lib/Basic/IdentifierTable.cpp (revision 2005f484f6c021318848cffda2c3d97c58615bb5)
1 //===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdentifierInfo, IdentifierVisitor, and
10 // IdentifierTable interfaces.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/IdentifierTable.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/DiagnosticLex.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/OperatorKinds.h"
19 #include "clang/Basic/Specifiers.h"
20 #include "clang/Basic/TargetBuiltins.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/DenseMapInfo.h"
23 #include "llvm/ADT/FoldingSet.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringMap.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 #include <cstdio>
31 #include <cstring>
32 #include <string>
33 
34 using namespace clang;
35 
36 // A check to make sure the ObjCOrBuiltinID has sufficient room to store the
37 // largest possible target/aux-target combination. If we exceed this, we likely
38 // need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
39 static_assert(2 * LargestBuiltinID < (2 << (ObjCOrBuiltinIDBits - 1)),
40               "Insufficient ObjCOrBuiltinID Bits");
41 
42 //===----------------------------------------------------------------------===//
43 // IdentifierTable Implementation
44 //===----------------------------------------------------------------------===//
45 
46 IdentifierIterator::~IdentifierIterator() = default;
47 
48 IdentifierInfoLookup::~IdentifierInfoLookup() = default;
49 
50 namespace {
51 
52 /// A simple identifier lookup iterator that represents an
53 /// empty sequence of identifiers.
54 class EmptyLookupIterator : public IdentifierIterator {
55 public:
56   StringRef Next() override { return StringRef(); }
57 };
58 
59 } // namespace
60 
61 IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
62   return new EmptyLookupIterator();
63 }
64 
65 IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
66     : HashTable(8192), // Start with space for 8K identifiers.
67       ExternalLookup(ExternalLookup) {}
68 
69 IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
70                                  IdentifierInfoLookup *ExternalLookup)
71     : IdentifierTable(ExternalLookup) {
72   // Populate the identifier table with info about keywords for the current
73   // language.
74   AddKeywords(LangOpts);
75 }
76 
77 //===----------------------------------------------------------------------===//
78 // Language Keyword Implementation
79 //===----------------------------------------------------------------------===//
80 
81 // Constants for TokenKinds.def
82 namespace {
83 
84   enum TokenKey : unsigned {
85     KEYC99        = 0x1,
86     KEYCXX        = 0x2,
87     KEYCXX11      = 0x4,
88     KEYGNU        = 0x8,
89     KEYMS         = 0x10,
90     BOOLSUPPORT   = 0x20,
91     KEYALTIVEC    = 0x40,
92     KEYNOCXX      = 0x80,
93     KEYBORLAND    = 0x100,
94     KEYOPENCLC    = 0x200,
95     KEYC23        = 0x400,
96     KEYNOMS18     = 0x800,
97     KEYNOOPENCL   = 0x1000,
98     WCHARSUPPORT  = 0x2000,
99     HALFSUPPORT   = 0x4000,
100     CHAR8SUPPORT  = 0x8000,
101     KEYOBJC       = 0x10000,
102     KEYZVECTOR    = 0x20000,
103     KEYCOROUTINES = 0x40000,
104     KEYMODULES    = 0x80000,
105     KEYCXX20      = 0x100000,
106     KEYOPENCLCXX  = 0x200000,
107     KEYMSCOMPAT   = 0x400000,
108     KEYSYCL       = 0x800000,
109     KEYCUDA       = 0x1000000,
110     KEYHLSL       = 0x2000000,
111     KEYMAX        = KEYHLSL, // The maximum key
112     KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
113     KEYALL = (KEYMAX | (KEYMAX-1)) & ~KEYNOMS18 &
114              ~KEYNOOPENCL // KEYNOMS18 and KEYNOOPENCL are used to exclude.
115   };
116 
117   /// How a keyword is treated in the selected standard. This enum is ordered
118   /// intentionally so that the value that 'wins' is the most 'permissive'.
119   enum KeywordStatus {
120     KS_Unknown,     // Not yet calculated. Used when figuring out the status.
121     KS_Disabled,    // Disabled
122     KS_Future,      // Is a keyword in future standard
123     KS_Extension,   // Is an extension
124     KS_Enabled,     // Enabled
125   };
126 
127 } // namespace
128 
129 // This works on a single TokenKey flag and checks the LangOpts to get the
130 // KeywordStatus based exclusively on this flag, so that it can be merged in
131 // getKeywordStatus. Most should be enabled/disabled, but some might imply
132 // 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
133 // be disabled, and the calling function makes it 'disabled' if no other flag
134 // changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
135 static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
136                                             TokenKey Flag) {
137   // Flag is a single bit version of TokenKey (that is, not
138   // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
139   assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
140 
141   switch (Flag) {
142   case KEYC99:
143     if (LangOpts.C99)
144       return KS_Enabled;
145     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
146   case KEYC23:
147     if (LangOpts.C23)
148       return KS_Enabled;
149     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
150   case KEYCXX:
151     return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
152   case KEYCXX11:
153     if (LangOpts.CPlusPlus11)
154       return KS_Enabled;
155     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
156   case KEYCXX20:
157     if (LangOpts.CPlusPlus20)
158       return KS_Enabled;
159     return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
160   case KEYGNU:
161     return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
162   case KEYMS:
163     return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
164   case BOOLSUPPORT:
165     if (LangOpts.Bool)      return KS_Enabled;
166     return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
167   case KEYALTIVEC:
168     return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
169   case KEYBORLAND:
170     return LangOpts.Borland ? KS_Extension : KS_Unknown;
171   case KEYOPENCLC:
172     return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
173                                                         : KS_Unknown;
174   case WCHARSUPPORT:
175     return LangOpts.WChar ? KS_Enabled : KS_Unknown;
176   case HALFSUPPORT:
177     return LangOpts.Half ? KS_Enabled : KS_Unknown;
178   case CHAR8SUPPORT:
179     if (LangOpts.Char8) return KS_Enabled;
180     if (LangOpts.CPlusPlus20) return KS_Unknown;
181     if (LangOpts.CPlusPlus) return KS_Future;
182     return KS_Unknown;
183   case KEYOBJC:
184     // We treat bridge casts as objective-C keywords so we can warn on them
185     // in non-arc mode.
186     return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
187   case KEYZVECTOR:
188     return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
189   case KEYCOROUTINES:
190     return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
191   case KEYMODULES:
192     return KS_Unknown;
193   case KEYOPENCLCXX:
194     return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
195   case KEYMSCOMPAT:
196     return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
197   case KEYSYCL:
198     return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
199   case KEYCUDA:
200     return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
201   case KEYHLSL:
202     return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
203   case KEYNOCXX:
204     // This is enabled in all non-C++ modes, but might be enabled for other
205     // reasons as well.
206     return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
207   case KEYNOOPENCL:
208     // The disable behavior for this is handled in getKeywordStatus.
209     return KS_Unknown;
210   case KEYNOMS18:
211     // The disable behavior for this is handled in getKeywordStatus.
212     return KS_Unknown;
213   default:
214     llvm_unreachable("Unknown KeywordStatus flag");
215   }
216 }
217 
218 /// Translates flags as specified in TokenKinds.def into keyword status
219 /// in the given language standard.
220 static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
221                                       unsigned Flags) {
222   // KEYALL means always enabled, so special case this one.
223   if (Flags == KEYALL) return KS_Enabled;
224   // These are tests that need to 'always win', as they are special in that they
225   // disable based on certain conditions.
226   if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
227   if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
228       !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
229     return KS_Disabled;
230 
231   KeywordStatus CurStatus = KS_Unknown;
232 
233   while (Flags != 0) {
234     unsigned CurFlag = Flags & ~(Flags - 1);
235     Flags = Flags & ~CurFlag;
236     CurStatus = std::max(
237         CurStatus,
238         getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag)));
239   }
240 
241   if (CurStatus == KS_Unknown)
242     return KS_Disabled;
243   return CurStatus;
244 }
245 
246 /// AddKeyword - This method is used to associate a token ID with specific
247 /// identifiers because they are language keywords.  This causes the lexer to
248 /// automatically map matching identifiers to specialized token codes.
249 static void AddKeyword(StringRef Keyword,
250                        tok::TokenKind TokenCode, unsigned Flags,
251                        const LangOptions &LangOpts, IdentifierTable &Table) {
252   KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
253 
254   // Don't add this keyword if disabled in this language.
255   if (AddResult == KS_Disabled) return;
256 
257   IdentifierInfo &Info =
258       Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
259   Info.setIsExtensionToken(AddResult == KS_Extension);
260   Info.setIsFutureCompatKeyword(AddResult == KS_Future);
261 }
262 
263 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
264 /// representations.
265 static void AddCXXOperatorKeyword(StringRef Keyword,
266                                   tok::TokenKind TokenCode,
267                                   IdentifierTable &Table) {
268   IdentifierInfo &Info = Table.get(Keyword, TokenCode);
269   Info.setIsCPlusPlusOperatorKeyword();
270 }
271 
272 /// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
273 /// or "property".
274 static void AddObjCKeyword(StringRef Name,
275                            tok::ObjCKeywordKind ObjCID,
276                            IdentifierTable &Table) {
277   Table.get(Name).setObjCKeywordID(ObjCID);
278 }
279 
280 static void AddInterestingIdentifier(StringRef Name,
281                                      tok::InterestingIdentifierKind BTID,
282                                      IdentifierTable &Table) {
283   // Don't add 'not_interesting' identifier.
284   if (BTID != tok::not_interesting) {
285     IdentifierInfo &Info = Table.get(Name, tok::identifier);
286     Info.setInterestingIdentifierID(BTID);
287   }
288 }
289 
290 /// AddKeywords - Add all keywords to the symbol table.
291 ///
292 void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
293   // Add keywords and tokens for the current language.
294 #define KEYWORD(NAME, FLAGS) \
295   AddKeyword(StringRef(#NAME), tok::kw_ ## NAME,  \
296              FLAGS, LangOpts, *this);
297 #define ALIAS(NAME, TOK, FLAGS) \
298   AddKeyword(StringRef(NAME), tok::kw_ ## TOK,  \
299              FLAGS, LangOpts, *this);
300 #define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
301   if (LangOpts.CXXOperatorNames)          \
302     AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this);
303 #define OBJC_AT_KEYWORD(NAME)  \
304   if (LangOpts.ObjC)           \
305     AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
306 #define INTERESTING_IDENTIFIER(NAME)                                           \
307   AddInterestingIdentifier(StringRef(#NAME), tok::NAME, *this);
308 
309 #define TESTING_KEYWORD(NAME, FLAGS)
310 #include "clang/Basic/TokenKinds.def"
311 
312   if (LangOpts.ParseUnknownAnytype)
313     AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
314                LangOpts, *this);
315 
316   if (LangOpts.DeclSpecKeyword)
317     AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
318 
319   if (LangOpts.IEEE128)
320     AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
321 
322   // Add the 'import' contextual keyword.
323   get("import").setModulesImport(true);
324 }
325 
326 /// Checks if the specified token kind represents a keyword in the
327 /// specified language.
328 /// \returns Status of the keyword in the language.
329 static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
330                                       tok::TokenKind K) {
331   switch (K) {
332 #define KEYWORD(NAME, FLAGS) \
333   case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
334 #include "clang/Basic/TokenKinds.def"
335   default: return KS_Disabled;
336   }
337 }
338 
339 /// Returns true if the identifier represents a keyword in the
340 /// specified language.
341 bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
342   switch (getTokenKwStatus(LangOpts, getTokenID())) {
343   case KS_Enabled:
344   case KS_Extension:
345     return true;
346   default:
347     return false;
348   }
349 }
350 
351 /// Returns true if the identifier represents a C++ keyword in the
352 /// specified language.
353 bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
354   if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
355     return false;
356   // This is a C++ keyword if this identifier is not a keyword when checked
357   // using LangOptions without C++ support.
358   LangOptions LangOptsNoCPP = LangOpts;
359   LangOptsNoCPP.CPlusPlus = false;
360   LangOptsNoCPP.CPlusPlus11 = false;
361   LangOptsNoCPP.CPlusPlus20 = false;
362   return !isKeyword(LangOptsNoCPP);
363 }
364 
365 ReservedIdentifierStatus
366 IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
367   StringRef Name = getName();
368 
369   // '_' is a reserved identifier, but its use is so common (e.g. to store
370   // ignored values) that we don't warn on it.
371   if (Name.size() <= 1)
372     return ReservedIdentifierStatus::NotReserved;
373 
374   // [lex.name] p3
375   if (Name[0] == '_') {
376 
377     // Each name that begins with an underscore followed by an uppercase letter
378     // or another underscore is reserved.
379     if (Name[1] == '_')
380       return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
381 
382     if ('A' <= Name[1] && Name[1] <= 'Z')
383       return ReservedIdentifierStatus::
384           StartsWithUnderscoreFollowedByCapitalLetter;
385 
386     // This is a bit misleading: it actually means it's only reserved if we're
387     // at global scope because it starts with an underscore.
388     return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
389   }
390 
391   // Each name that contains a double underscore (__) is reserved.
392   if (LangOpts.CPlusPlus && Name.contains("__"))
393     return ReservedIdentifierStatus::ContainsDoubleUnderscore;
394 
395   return ReservedIdentifierStatus::NotReserved;
396 }
397 
398 ReservedLiteralSuffixIdStatus
399 IdentifierInfo::isReservedLiteralSuffixId() const {
400   StringRef Name = getName();
401 
402   if (Name[0] != '_')
403     return ReservedLiteralSuffixIdStatus::NotStartsWithUnderscore;
404 
405   if (Name.contains("__"))
406     return ReservedLiteralSuffixIdStatus::ContainsDoubleUnderscore;
407 
408   return ReservedLiteralSuffixIdStatus::NotReserved;
409 }
410 
411 StringRef IdentifierInfo::deuglifiedName() const {
412   StringRef Name = getName();
413   if (Name.size() >= 2 && Name.front() == '_' &&
414       (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
415     return Name.ltrim('_');
416   return Name;
417 }
418 
419 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
420   // We use a perfect hash function here involving the length of the keyword,
421   // the first and third character.  For preprocessor ID's there are no
422   // collisions (if there were, the switch below would complain about duplicate
423   // case values).  Note that this depends on 'if' being null terminated.
424 
425 #define HASH(LEN, FIRST, THIRD) \
426   (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
427 #define CASE(LEN, FIRST, THIRD, NAME) \
428   case HASH(LEN, FIRST, THIRD): \
429     return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
430 
431   unsigned Len = getLength();
432   if (Len < 2) return tok::pp_not_keyword;
433   const char *Name = getNameStart();
434   switch (HASH(Len, Name[0], Name[2])) {
435   default: return tok::pp_not_keyword;
436   CASE( 2, 'i', '\0', if);
437   CASE( 4, 'e', 'i', elif);
438   CASE( 4, 'e', 's', else);
439   CASE( 4, 'l', 'n', line);
440   CASE( 4, 's', 'c', sccs);
441   CASE( 5, 'e', 'd', endif);
442   CASE( 5, 'e', 'r', error);
443   CASE( 5, 'i', 'e', ident);
444   CASE( 5, 'i', 'd', ifdef);
445   CASE( 5, 'u', 'd', undef);
446 
447   CASE( 6, 'a', 's', assert);
448   CASE( 6, 'd', 'f', define);
449   CASE( 6, 'i', 'n', ifndef);
450   CASE( 6, 'i', 'p', import);
451   CASE( 6, 'p', 'a', pragma);
452 
453   CASE( 7, 'd', 'f', defined);
454   CASE( 7, 'e', 'i', elifdef);
455   CASE( 7, 'i', 'c', include);
456   CASE( 7, 'w', 'r', warning);
457 
458   CASE( 8, 'e', 'i', elifndef);
459   CASE( 8, 'u', 'a', unassert);
460   CASE(12, 'i', 'c', include_next);
461 
462   CASE(14, '_', 'p', __public_macro);
463 
464   CASE(15, '_', 'p', __private_macro);
465 
466   CASE(16, '_', 'i', __include_macros);
467 #undef CASE
468 #undef HASH
469   }
470 }
471 
472 //===----------------------------------------------------------------------===//
473 // Stats Implementation
474 //===----------------------------------------------------------------------===//
475 
476 /// PrintStats - Print statistics about how well the identifier table is doing
477 /// at hashing identifiers.
478 void IdentifierTable::PrintStats() const {
479   unsigned NumBuckets = HashTable.getNumBuckets();
480   unsigned NumIdentifiers = HashTable.getNumItems();
481   unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
482   unsigned AverageIdentifierSize = 0;
483   unsigned MaxIdentifierLength = 0;
484 
485   // TODO: Figure out maximum times an identifier had to probe for -stats.
486   for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
487        I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
488     unsigned IdLen = I->getKeyLength();
489     AverageIdentifierSize += IdLen;
490     if (MaxIdentifierLength < IdLen)
491       MaxIdentifierLength = IdLen;
492   }
493 
494   fprintf(stderr, "\n*** Identifier Table Stats:\n");
495   fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
496   fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
497   fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
498           NumIdentifiers/(double)NumBuckets);
499   fprintf(stderr, "Ave identifier length: %f\n",
500           (AverageIdentifierSize/(double)NumIdentifiers));
501   fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
502 
503   // Compute statistics about the memory allocated for identifiers.
504   HashTable.getAllocator().PrintStats();
505 }
506 
507 //===----------------------------------------------------------------------===//
508 // SelectorTable Implementation
509 //===----------------------------------------------------------------------===//
510 
511 unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
512   return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
513 }
514 
515 bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
516   assert(!Names.empty() && "must have >= 1 selector slots");
517   if (getNumArgs() != Names.size())
518     return false;
519   for (unsigned I = 0, E = Names.size(); I != E; ++I) {
520     if (getNameForSlot(I) != Names[I])
521       return false;
522   }
523   return true;
524 }
525 
526 bool Selector::isUnarySelector(StringRef Name) const {
527   return isUnarySelector() && getNameForSlot(0) == Name;
528 }
529 
530 unsigned Selector::getNumArgs() const {
531   unsigned IIF = getIdentifierInfoFlag();
532   if (IIF <= ZeroArg)
533     return 0;
534   if (IIF == OneArg)
535     return 1;
536   // We point to a MultiKeywordSelector.
537   MultiKeywordSelector *SI = getMultiKeywordSelector();
538   return SI->getNumArgs();
539 }
540 
541 IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
542   if (getIdentifierInfoFlag() < MultiArg) {
543     assert(argIndex == 0 && "illegal keyword index");
544     return getAsIdentifierInfo();
545   }
546 
547   // We point to a MultiKeywordSelector.
548   MultiKeywordSelector *SI = getMultiKeywordSelector();
549   return SI->getIdentifierInfoForSlot(argIndex);
550 }
551 
552 StringRef Selector::getNameForSlot(unsigned int argIndex) const {
553   IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
554   return II ? II->getName() : StringRef();
555 }
556 
557 std::string MultiKeywordSelector::getName() const {
558   SmallString<256> Str;
559   llvm::raw_svector_ostream OS(Str);
560   for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
561     if (*I)
562       OS << (*I)->getName();
563     OS << ':';
564   }
565 
566   return std::string(OS.str());
567 }
568 
569 std::string Selector::getAsString() const {
570   if (isNull())
571     return "<null selector>";
572 
573   if (getIdentifierInfoFlag() < MultiArg) {
574     IdentifierInfo *II = getAsIdentifierInfo();
575 
576     if (getNumArgs() == 0) {
577       assert(II && "If the number of arguments is 0 then II is guaranteed to "
578                    "not be null.");
579       return std::string(II->getName());
580     }
581 
582     if (!II)
583       return ":";
584 
585     return II->getName().str() + ":";
586   }
587 
588   // We have a multiple keyword selector.
589   return getMultiKeywordSelector()->getName();
590 }
591 
592 void Selector::print(llvm::raw_ostream &OS) const {
593   OS << getAsString();
594 }
595 
596 LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); }
597 
598 /// Interpreting the given string using the normal CamelCase
599 /// conventions, determine whether the given string starts with the
600 /// given "word", which is assumed to end in a lowercase letter.
601 static bool startsWithWord(StringRef name, StringRef word) {
602   if (name.size() < word.size()) return false;
603   return ((name.size() == word.size() || !isLowercase(name[word.size()])) &&
604           name.startswith(word));
605 }
606 
607 ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
608   IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
609   if (!first) return OMF_None;
610 
611   StringRef name = first->getName();
612   if (sel.isUnarySelector()) {
613     if (name == "autorelease") return OMF_autorelease;
614     if (name == "dealloc") return OMF_dealloc;
615     if (name == "finalize") return OMF_finalize;
616     if (name == "release") return OMF_release;
617     if (name == "retain") return OMF_retain;
618     if (name == "retainCount") return OMF_retainCount;
619     if (name == "self") return OMF_self;
620     if (name == "initialize") return OMF_initialize;
621   }
622 
623   if (name == "performSelector" || name == "performSelectorInBackground" ||
624       name == "performSelectorOnMainThread")
625     return OMF_performSelector;
626 
627   // The other method families may begin with a prefix of underscores.
628   while (!name.empty() && name.front() == '_')
629     name = name.substr(1);
630 
631   if (name.empty()) return OMF_None;
632   switch (name.front()) {
633   case 'a':
634     if (startsWithWord(name, "alloc")) return OMF_alloc;
635     break;
636   case 'c':
637     if (startsWithWord(name, "copy")) return OMF_copy;
638     break;
639   case 'i':
640     if (startsWithWord(name, "init")) return OMF_init;
641     break;
642   case 'm':
643     if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy;
644     break;
645   case 'n':
646     if (startsWithWord(name, "new")) return OMF_new;
647     break;
648   default:
649     break;
650   }
651 
652   return OMF_None;
653 }
654 
655 ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
656   IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
657   if (!first) return OIT_None;
658 
659   StringRef name = first->getName();
660 
661   if (name.empty()) return OIT_None;
662   switch (name.front()) {
663     case 'a':
664       if (startsWithWord(name, "array")) return OIT_Array;
665       break;
666     case 'd':
667       if (startsWithWord(name, "default")) return OIT_ReturnsSelf;
668       if (startsWithWord(name, "dictionary")) return OIT_Dictionary;
669       break;
670     case 's':
671       if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
672       if (startsWithWord(name, "standard")) return OIT_Singleton;
673       break;
674     case 'i':
675       if (startsWithWord(name, "init")) return OIT_Init;
676       break;
677     default:
678       break;
679   }
680   return OIT_None;
681 }
682 
683 ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
684   IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
685   if (!first) return SFF_None;
686 
687   StringRef name = first->getName();
688 
689   switch (name.front()) {
690     case 'a':
691       if (name == "appendFormat") return SFF_NSString;
692       break;
693 
694     case 'i':
695       if (name == "initWithFormat") return SFF_NSString;
696       break;
697 
698     case 'l':
699       if (name == "localizedStringWithFormat") return SFF_NSString;
700       break;
701 
702     case 's':
703       if (name == "stringByAppendingFormat" ||
704           name == "stringWithFormat") return SFF_NSString;
705       break;
706   }
707   return SFF_None;
708 }
709 
710 namespace {
711 
712 struct SelectorTableImpl {
713   llvm::FoldingSet<MultiKeywordSelector> Table;
714   llvm::BumpPtrAllocator Allocator;
715 };
716 
717 } // namespace
718 
719 static SelectorTableImpl &getSelectorTableImpl(void *P) {
720   return *static_cast<SelectorTableImpl*>(P);
721 }
722 
723 SmallString<64>
724 SelectorTable::constructSetterName(StringRef Name) {
725   SmallString<64> SetterName("set");
726   SetterName += Name;
727   SetterName[3] = toUppercase(SetterName[3]);
728   return SetterName;
729 }
730 
731 Selector
732 SelectorTable::constructSetterSelector(IdentifierTable &Idents,
733                                        SelectorTable &SelTable,
734                                        const IdentifierInfo *Name) {
735   IdentifierInfo *SetterName =
736     &Idents.get(constructSetterName(Name->getName()));
737   return SelTable.getUnarySelector(SetterName);
738 }
739 
740 std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
741   StringRef Name = Sel.getNameForSlot(0);
742   assert(Name.startswith("set") && "invalid setter name");
743   return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str();
744 }
745 
746 size_t SelectorTable::getTotalMemory() const {
747   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
748   return SelTabImpl.Allocator.getTotalMemory();
749 }
750 
751 Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
752   if (nKeys < 2)
753     return Selector(IIV[0], nKeys);
754 
755   SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
756 
757   // Unique selector, to guarantee there is one per name.
758   llvm::FoldingSetNodeID ID;
759   MultiKeywordSelector::Profile(ID, IIV, nKeys);
760 
761   void *InsertPos = nullptr;
762   if (MultiKeywordSelector *SI =
763         SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
764     return Selector(SI);
765 
766   // MultiKeywordSelector objects are not allocated with new because they have a
767   // variable size array (for parameter types) at the end of them.
768   unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
769   MultiKeywordSelector *SI =
770       (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
771           Size, alignof(MultiKeywordSelector));
772   new (SI) MultiKeywordSelector(nKeys, IIV);
773   SelTabImpl.Table.InsertNode(SI, InsertPos);
774   return Selector(SI);
775 }
776 
777 SelectorTable::SelectorTable() {
778   Impl = new SelectorTableImpl();
779 }
780 
781 SelectorTable::~SelectorTable() {
782   delete &getSelectorTableImpl(Impl);
783 }
784 
785 const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
786   switch (Operator) {
787   case OO_None:
788   case NUM_OVERLOADED_OPERATORS:
789     return nullptr;
790 
791 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
792   case OO_##Name: return Spelling;
793 #include "clang/Basic/OperatorKinds.def"
794   }
795 
796   llvm_unreachable("Invalid OverloadedOperatorKind!");
797 }
798 
799 StringRef clang::getNullabilitySpelling(NullabilityKind kind,
800                                         bool isContextSensitive) {
801   switch (kind) {
802   case NullabilityKind::NonNull:
803     return isContextSensitive ? "nonnull" : "_Nonnull";
804 
805   case NullabilityKind::Nullable:
806     return isContextSensitive ? "nullable" : "_Nullable";
807 
808   case NullabilityKind::NullableResult:
809     assert(!isContextSensitive &&
810            "_Nullable_result isn't supported as context-sensitive keyword");
811     return "_Nullable_result";
812 
813   case NullabilityKind::Unspecified:
814     return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
815   }
816   llvm_unreachable("Unknown nullability kind.");
817 }
818 
819 llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
820                                      NullabilityKind NK) {
821   switch (NK) {
822   case NullabilityKind::NonNull:
823     return OS << "NonNull";
824   case NullabilityKind::Nullable:
825     return OS << "Nullable";
826   case NullabilityKind::NullableResult:
827     return OS << "NullableResult";
828   case NullabilityKind::Unspecified:
829     return OS << "Unspecified";
830   }
831   llvm_unreachable("Unknown nullability kind.");
832 }
833 
834 diag::kind
835 IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
836                                          const LangOptions &LangOpts) {
837   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
838 
839   unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
840 #define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
841 #include "clang/Basic/TokenKinds.def"
842 #undef KEYWORD
843       ;
844 
845   if (LangOpts.CPlusPlus) {
846     if ((Flags & KEYCXX11) == KEYCXX11)
847       return diag::warn_cxx11_keyword;
848 
849     // char8_t is not modeled as a CXX20_KEYWORD because it's not
850     // unconditionally enabled in C++20 mode. (It can be disabled
851     // by -fno-char8_t.)
852     if (((Flags & KEYCXX20) == KEYCXX20) ||
853         ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
854       return diag::warn_cxx20_keyword;
855   } else {
856     if ((Flags & KEYC99) == KEYC99)
857       return diag::warn_c99_keyword;
858     if ((Flags & KEYC23) == KEYC23)
859       return diag::warn_c23_keyword;
860   }
861 
862   llvm_unreachable(
863       "Keyword not known to come from a newer Standard or proposed Standard");
864 }
865