xref: /llvm-project/clang/lib/Lex/PPDirectives.cpp (revision 4018317407006b2c632fbb75729de624a2426439)
1 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implements # directive processing for the Preprocessor.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/AttributeCommonInfo.h"
15 #include "clang/Basic/Attributes.h"
16 #include "clang/Basic/CharInfo.h"
17 #include "clang/Basic/DirectoryEntry.h"
18 #include "clang/Basic/FileManager.h"
19 #include "clang/Basic/IdentifierTable.h"
20 #include "clang/Basic/LangOptions.h"
21 #include "clang/Basic/Module.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Basic/TargetInfo.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/CodeCompletionHandler.h"
27 #include "clang/Lex/HeaderSearch.h"
28 #include "clang/Lex/LexDiagnostic.h"
29 #include "clang/Lex/LiteralSupport.h"
30 #include "clang/Lex/MacroInfo.h"
31 #include "clang/Lex/ModuleLoader.h"
32 #include "clang/Lex/ModuleMap.h"
33 #include "clang/Lex/PPCallbacks.h"
34 #include "clang/Lex/Pragma.h"
35 #include "clang/Lex/Preprocessor.h"
36 #include "clang/Lex/PreprocessorOptions.h"
37 #include "clang/Lex/Token.h"
38 #include "clang/Lex/VariadicMacroSupport.h"
39 #include "llvm/ADT/ArrayRef.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/ScopeExit.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringExtras.h"
44 #include "llvm/ADT/StringRef.h"
45 #include "llvm/ADT/StringSwitch.h"
46 #include "llvm/Support/ErrorHandling.h"
47 #include "llvm/Support/Path.h"
48 #include "llvm/Support/SaveAndRestore.h"
49 #include <algorithm>
50 #include <cassert>
51 #include <cstring>
52 #include <optional>
53 #include <string>
54 #include <utility>
55 
56 using namespace clang;
57 
58 //===----------------------------------------------------------------------===//
59 // Utility Methods for Preprocessor Directive Handling.
60 //===----------------------------------------------------------------------===//
61 
62 MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
63   static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
64   return new (BP) MacroInfo(L);
65 }
66 
67 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
68                                                            SourceLocation Loc) {
69   return new (BP) DefMacroDirective(MI, Loc);
70 }
71 
72 UndefMacroDirective *
73 Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
74   return new (BP) UndefMacroDirective(UndefLoc);
75 }
76 
77 VisibilityMacroDirective *
78 Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
79                                                bool isPublic) {
80   return new (BP) VisibilityMacroDirective(Loc, isPublic);
81 }
82 
83 /// Read and discard all tokens remaining on the current line until
84 /// the tok::eod token is found.
85 SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {
86   SourceRange Res;
87 
88   LexUnexpandedToken(Tmp);
89   Res.setBegin(Tmp.getLocation());
90   while (Tmp.isNot(tok::eod)) {
91     assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
92     LexUnexpandedToken(Tmp);
93   }
94   Res.setEnd(Tmp.getLocation());
95   return Res;
96 }
97 
98 /// Enumerates possible cases of #define/#undef a reserved identifier.
99 enum MacroDiag {
100   MD_NoWarn,        //> Not a reserved identifier
101   MD_KeywordDef,    //> Macro hides keyword, enabled by default
102   MD_ReservedMacro, //> #define of #undef reserved id, disabled by default
103   MD_ReservedAttributeIdentifier
104 };
105 
106 /// Enumerates possible %select values for the pp_err_elif_after_else and
107 /// pp_err_elif_without_if diagnostics.
108 enum PPElifDiag {
109   PED_Elif,
110   PED_Elifdef,
111   PED_Elifndef
112 };
113 
114 static bool isFeatureTestMacro(StringRef MacroName) {
115   // list from:
116   // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
117   // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
118   // * man 7 feature_test_macros
119   // The list must be sorted for correct binary search.
120   static constexpr StringRef ReservedMacro[] = {
121       "_ATFILE_SOURCE",
122       "_BSD_SOURCE",
123       "_CRT_NONSTDC_NO_WARNINGS",
124       "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
125       "_CRT_SECURE_NO_WARNINGS",
126       "_FILE_OFFSET_BITS",
127       "_FORTIFY_SOURCE",
128       "_GLIBCXX_ASSERTIONS",
129       "_GLIBCXX_CONCEPT_CHECKS",
130       "_GLIBCXX_DEBUG",
131       "_GLIBCXX_DEBUG_PEDANTIC",
132       "_GLIBCXX_PARALLEL",
133       "_GLIBCXX_PARALLEL_ASSERTIONS",
134       "_GLIBCXX_SANITIZE_VECTOR",
135       "_GLIBCXX_USE_CXX11_ABI",
136       "_GLIBCXX_USE_DEPRECATED",
137       "_GNU_SOURCE",
138       "_ISOC11_SOURCE",
139       "_ISOC95_SOURCE",
140       "_ISOC99_SOURCE",
141       "_LARGEFILE64_SOURCE",
142       "_POSIX_C_SOURCE",
143       "_REENTRANT",
144       "_SVID_SOURCE",
145       "_THREAD_SAFE",
146       "_XOPEN_SOURCE",
147       "_XOPEN_SOURCE_EXTENDED",
148       "__STDCPP_WANT_MATH_SPEC_FUNCS__",
149       "__STDC_FORMAT_MACROS",
150   };
151   return std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro),
152                             MacroName);
153 }
154 
155 static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
156                                      const MacroInfo *MI,
157                                      const StringRef MacroName) {
158   // If this is a macro with special handling (like __LINE__) then it's language
159   // defined.
160   if (MI->isBuiltinMacro())
161     return true;
162   // Builtin macros are defined in the builtin file
163   if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc()))
164     return false;
165   // C defines macros starting with __STDC, and C++ defines macros starting with
166   // __STDCPP
167   if (MacroName.starts_with("__STDC"))
168     return true;
169   // C++ defines the __cplusplus macro
170   if (MacroName == "__cplusplus")
171     return true;
172   // C++ defines various feature-test macros starting with __cpp
173   if (MacroName.starts_with("__cpp"))
174     return true;
175   // Anything else isn't language-defined
176   return false;
177 }
178 
179 static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) {
180   const LangOptions &Lang = PP.getLangOpts();
181   if (Lang.CPlusPlus &&
182       hasAttribute(AttributeCommonInfo::AS_CXX11, /* Scope*/ nullptr, II,
183                    PP.getTargetInfo(), Lang, /*CheckPlugins*/ false) > 0) {
184     AttributeCommonInfo::AttrArgsInfo AttrArgsInfo =
185         AttributeCommonInfo::getCXX11AttrArgsInfo(II);
186     if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required)
187       return PP.isNextPPTokenLParen();
188 
189     return !PP.isNextPPTokenLParen() ||
190            AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional;
191   }
192   return false;
193 }
194 
195 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
196   const LangOptions &Lang = PP.getLangOpts();
197   StringRef Text = II->getName();
198   if (isReservedInAllContexts(II->isReserved(Lang)))
199     return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro;
200   if (II->isKeyword(Lang))
201     return MD_KeywordDef;
202   if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))
203     return MD_KeywordDef;
204   if (isReservedCXXAttributeName(PP, II))
205     return MD_ReservedAttributeIdentifier;
206   return MD_NoWarn;
207 }
208 
209 static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
210   const LangOptions &Lang = PP.getLangOpts();
211   // Do not warn on keyword undef.  It is generally harmless and widely used.
212   if (isReservedInAllContexts(II->isReserved(Lang)))
213     return MD_ReservedMacro;
214   if (isReservedCXXAttributeName(PP, II))
215     return MD_ReservedAttributeIdentifier;
216   return MD_NoWarn;
217 }
218 
219 // Return true if we want to issue a diagnostic by default if we
220 // encounter this name in a #include with the wrong case. For now,
221 // this includes the standard C and C++ headers, Posix headers,
222 // and Boost headers. Improper case for these #includes is a
223 // potential portability issue.
224 static bool warnByDefaultOnWrongCase(StringRef Include) {
225   // If the first component of the path is "boost", treat this like a standard header
226   // for the purposes of diagnostics.
227   if (::llvm::sys::path::begin(Include)->equals_insensitive("boost"))
228     return true;
229 
230   // "condition_variable" is the longest standard header name at 18 characters.
231   // If the include file name is longer than that, it can't be a standard header.
232   static const size_t MaxStdHeaderNameLen = 18u;
233   if (Include.size() > MaxStdHeaderNameLen)
234     return false;
235 
236   // Lowercase and normalize the search string.
237   SmallString<32> LowerInclude{Include};
238   for (char &Ch : LowerInclude) {
239     // In the ASCII range?
240     if (static_cast<unsigned char>(Ch) > 0x7f)
241       return false; // Can't be a standard header
242     // ASCII lowercase:
243     if (Ch >= 'A' && Ch <= 'Z')
244       Ch += 'a' - 'A';
245     // Normalize path separators for comparison purposes.
246     else if (::llvm::sys::path::is_separator(Ch))
247       Ch = '/';
248   }
249 
250   // The standard C/C++ and Posix headers
251   return llvm::StringSwitch<bool>(LowerInclude)
252     // C library headers
253     .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
254     .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
255     .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
256     .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stddef.h", true)
257     .Cases("stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true)
258     .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true)
259     .Cases("wchar.h", "wctype.h", true)
260 
261     // C++ headers for C library facilities
262     .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
263     .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
264     .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
265     .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
266     .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
267     .Case("cwctype", true)
268 
269     // C++ library headers
270     .Cases("algorithm", "fstream", "list", "regex", "thread", true)
271     .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
272     .Cases("atomic", "future", "map", "set", "type_traits", true)
273     .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
274     .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
275     .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
276     .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
277     .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
278     .Cases("deque", "istream", "queue", "string", "valarray", true)
279     .Cases("exception", "iterator", "random", "strstream", "vector", true)
280     .Cases("forward_list", "limits", "ratio", "system_error", true)
281 
282     // POSIX headers (which aren't also C headers)
283     .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
284     .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
285     .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
286     .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
287     .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
288     .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
289     .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
290     .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
291     .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
292     .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
293     .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
294     .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
295     .Default(false);
296 }
297 
298 /// Find a similar string in `Candidates`.
299 ///
300 /// \param LHS a string for a similar string in `Candidates`
301 ///
302 /// \param Candidates the candidates to find a similar string.
303 ///
304 /// \returns a similar string if exists. If no similar string exists,
305 /// returns std::nullopt.
306 static std::optional<StringRef>
307 findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
308   // We need to check if `Candidates` has the exact case-insensitive string
309   // because the Levenshtein distance match does not care about it.
310   for (StringRef C : Candidates) {
311     if (LHS.equals_insensitive(C)) {
312       return C;
313     }
314   }
315 
316   // Keep going with the Levenshtein distance match.
317   // If the LHS size is less than 3, use the LHS size minus 1 and if not,
318   // use the LHS size divided by 3.
319   size_t Length = LHS.size();
320   size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
321 
322   std::optional<std::pair<StringRef, size_t>> SimilarStr;
323   for (StringRef C : Candidates) {
324     size_t CurDist = LHS.edit_distance(C, true);
325     if (CurDist <= MaxDist) {
326       if (!SimilarStr) {
327         // The first similar string found.
328         SimilarStr = {C, CurDist};
329       } else if (CurDist < SimilarStr->second) {
330         // More similar string found.
331         SimilarStr = {C, CurDist};
332       }
333     }
334   }
335 
336   if (SimilarStr) {
337     return SimilarStr->first;
338   } else {
339     return std::nullopt;
340   }
341 }
342 
343 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
344                                   bool *ShadowFlag) {
345   // Missing macro name?
346   if (MacroNameTok.is(tok::eod))
347     return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
348 
349   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
350   if (!II)
351     return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
352 
353   if (II->isCPlusPlusOperatorKeyword()) {
354     // C++ 2.5p2: Alternative tokens behave the same as its primary token
355     // except for their spellings.
356     Diag(MacroNameTok, getLangOpts().MicrosoftExt
357                            ? diag::ext_pp_operator_used_as_macro_name
358                            : diag::err_pp_operator_used_as_macro_name)
359         << II << MacroNameTok.getKind();
360     // Allow #defining |and| and friends for Microsoft compatibility or
361     // recovery when legacy C headers are included in C++.
362   }
363 
364   if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
365     // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
366     return Diag(MacroNameTok, diag::err_defined_macro_name);
367   }
368 
369   // If defining/undefining reserved identifier or a keyword, we need to issue
370   // a warning.
371   SourceLocation MacroNameLoc = MacroNameTok.getLocation();
372   if (ShadowFlag)
373     *ShadowFlag = false;
374   if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
375       (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {
376     MacroDiag D = MD_NoWarn;
377     if (isDefineUndef == MU_Define) {
378       D = shouldWarnOnMacroDef(*this, II);
379     }
380     else if (isDefineUndef == MU_Undef)
381       D = shouldWarnOnMacroUndef(*this, II);
382     if (D == MD_KeywordDef) {
383       // We do not want to warn on some patterns widely used in configuration
384       // scripts.  This requires analyzing next tokens, so do not issue warnings
385       // now, only inform caller.
386       if (ShadowFlag)
387         *ShadowFlag = true;
388     }
389     if (D == MD_ReservedMacro)
390       Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
391     if (D == MD_ReservedAttributeIdentifier)
392       Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_attribute_id)
393           << II->getName();
394   }
395 
396   // Okay, we got a good identifier.
397   return false;
398 }
399 
400 /// Lex and validate a macro name, which occurs after a
401 /// \#define or \#undef.
402 ///
403 /// This sets the token kind to eod and discards the rest of the macro line if
404 /// the macro name is invalid.
405 ///
406 /// \param MacroNameTok Token that is expected to be a macro name.
407 /// \param isDefineUndef Context in which macro is used.
408 /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
409 void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
410                                  bool *ShadowFlag) {
411   // Read the token, don't allow macro expansion on it.
412   LexUnexpandedToken(MacroNameTok);
413 
414   if (MacroNameTok.is(tok::code_completion)) {
415     if (CodeComplete)
416       CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
417     setCodeCompletionReached();
418     LexUnexpandedToken(MacroNameTok);
419   }
420 
421   if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
422     return;
423 
424   // Invalid macro name, read and discard the rest of the line and set the
425   // token kind to tok::eod if necessary.
426   if (MacroNameTok.isNot(tok::eod)) {
427     MacroNameTok.setKind(tok::eod);
428     DiscardUntilEndOfDirective();
429   }
430 }
431 
432 /// Ensure that the next token is a tok::eod token.
433 ///
434 /// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is
435 /// true, then we consider macros that expand to zero tokens as being ok.
436 ///
437 /// Returns the location of the end of the directive.
438 SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
439                                                  bool EnableMacros) {
440   Token Tmp;
441   // Lex unexpanded tokens for most directives: macros might expand to zero
442   // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
443   // #line) allow empty macros.
444   if (EnableMacros)
445     Lex(Tmp);
446   else
447     LexUnexpandedToken(Tmp);
448 
449   // There should be no tokens after the directive, but we allow them as an
450   // extension.
451   while (Tmp.is(tok::comment))  // Skip comments in -C mode.
452     LexUnexpandedToken(Tmp);
453 
454   if (Tmp.is(tok::eod))
455     return Tmp.getLocation();
456 
457   // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
458   // or if this is a macro-style preprocessing directive, because it is more
459   // trouble than it is worth to insert /**/ and check that there is no /**/
460   // in the range also.
461   FixItHint Hint;
462   if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
463       !CurTokenLexer)
464     Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
465   Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
466   return DiscardUntilEndOfDirective().getEnd();
467 }
468 
469 void Preprocessor::SuggestTypoedDirective(const Token &Tok,
470                                           StringRef Directive) const {
471   // If this is a `.S` file, treat unknown # directives as non-preprocessor
472   // directives.
473   if (getLangOpts().AsmPreprocessor) return;
474 
475   std::vector<StringRef> Candidates = {
476       "if", "ifdef", "ifndef", "elif", "else", "endif"
477   };
478   if (LangOpts.C23 || LangOpts.CPlusPlus23)
479     Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});
480 
481   if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {
482     // Directive cannot be coming from macro.
483     assert(Tok.getLocation().isFileID());
484     CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
485         Tok.getLocation(),
486         Tok.getLocation().getLocWithOffset(Directive.size()));
487     StringRef SuggValue = *Sugg;
488 
489     auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue);
490     Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
491   }
492 }
493 
494 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
495 /// decided that the subsequent tokens are in the \#if'd out portion of the
496 /// file.  Lex the rest of the file, until we see an \#endif.  If
497 /// FoundNonSkipPortion is true, then we have already emitted code for part of
498 /// this \#if directive, so \#else/\#elif blocks should never be entered.
499 /// If ElseOk is true, then \#else directives are ok, if not, then we have
500 /// already seen one so a \#else directive is a duplicate.  When this returns,
501 /// the caller can lex the first valid token.
502 void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
503                                                 SourceLocation IfTokenLoc,
504                                                 bool FoundNonSkipPortion,
505                                                 bool FoundElse,
506                                                 SourceLocation ElseLoc) {
507   // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
508   // not getting called recursively by storing the RecordedSkippedRanges
509   // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
510   // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
511   // invalidated. If this changes and there is a need to call
512   // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
513   // change to do a second lookup in endLexPass function instead of reusing the
514   // lookup pointer.
515   assert(!SkippingExcludedConditionalBlock &&
516          "calling SkipExcludedConditionalBlock recursively");
517   llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
518 
519   ++NumSkipped;
520   assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
521   assert(CurPPLexer && "Conditional PP block must be in a file!");
522   assert(CurLexer && "Conditional PP block but no current lexer set!");
523 
524   if (PreambleConditionalStack.reachedEOFWhileSkipping())
525     PreambleConditionalStack.clearSkipInfo();
526   else
527     CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
528                                      FoundNonSkipPortion, FoundElse);
529 
530   // Enter raw mode to disable identifier lookup (and thus macro expansion),
531   // disabling warnings, etc.
532   CurPPLexer->LexingRawMode = true;
533   Token Tok;
534   SourceLocation endLoc;
535 
536   /// Keeps track and caches skipped ranges and also retrieves a prior skipped
537   /// range if the same block is re-visited.
538   struct SkippingRangeStateTy {
539     Preprocessor &PP;
540 
541     const char *BeginPtr = nullptr;
542     unsigned *SkipRangePtr = nullptr;
543 
544     SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
545 
546     void beginLexPass() {
547       if (BeginPtr)
548         return; // continue skipping a block.
549 
550       // Initiate a skipping block and adjust the lexer if we already skipped it
551       // before.
552       BeginPtr = PP.CurLexer->getBufferLocation();
553       SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
554       if (*SkipRangePtr) {
555         PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
556                           /*IsAtStartOfLine*/ true);
557       }
558     }
559 
560     void endLexPass(const char *Hashptr) {
561       if (!BeginPtr) {
562         // Not doing normal lexing.
563         assert(PP.CurLexer->isDependencyDirectivesLexer());
564         return;
565       }
566 
567       // Finished skipping a block, record the range if it's first time visited.
568       if (!*SkipRangePtr) {
569         *SkipRangePtr = Hashptr - BeginPtr;
570       }
571       assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));
572       BeginPtr = nullptr;
573       SkipRangePtr = nullptr;
574     }
575   } SkippingRangeState(*this);
576 
577   while (true) {
578     if (CurLexer->isDependencyDirectivesLexer()) {
579       CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
580     } else {
581       SkippingRangeState.beginLexPass();
582       while (true) {
583         CurLexer->Lex(Tok);
584 
585         if (Tok.is(tok::code_completion)) {
586           setCodeCompletionReached();
587           if (CodeComplete)
588             CodeComplete->CodeCompleteInConditionalExclusion();
589           continue;
590         }
591 
592         // If this is the end of the buffer, we have an error.
593         if (Tok.is(tok::eof)) {
594           // We don't emit errors for unterminated conditionals here,
595           // Lexer::LexEndOfFile can do that properly.
596           // Just return and let the caller lex after this #include.
597           if (PreambleConditionalStack.isRecording())
598             PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
599                                                       FoundNonSkipPortion,
600                                                       FoundElse, ElseLoc);
601           break;
602         }
603 
604         // If this token is not a preprocessor directive, just skip it.
605         if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
606           continue;
607 
608         break;
609       }
610     }
611     if (Tok.is(tok::eof))
612       break;
613 
614     // We just parsed a # character at the start of a line, so we're in
615     // directive mode.  Tell the lexer this so any newlines we see will be
616     // converted into an EOD token (this terminates the macro).
617     CurPPLexer->ParsingPreprocessorDirective = true;
618     if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
619 
620     assert(Tok.is(tok::hash));
621     const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
622     assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
623 
624     // Read the next token, the directive flavor.
625     LexUnexpandedToken(Tok);
626 
627     // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
628     // something bogus), skip it.
629     if (Tok.isNot(tok::raw_identifier)) {
630       CurPPLexer->ParsingPreprocessorDirective = false;
631       // Restore comment saving mode.
632       if (CurLexer) CurLexer->resetExtendedTokenMode();
633       continue;
634     }
635 
636     // If the first letter isn't i or e, it isn't intesting to us.  We know that
637     // this is safe in the face of spelling differences, because there is no way
638     // to spell an i/e in a strange way that is another letter.  Skipping this
639     // allows us to avoid looking up the identifier info for #define/#undef and
640     // other common directives.
641     StringRef RI = Tok.getRawIdentifier();
642 
643     char FirstChar = RI[0];
644     if (FirstChar >= 'a' && FirstChar <= 'z' &&
645         FirstChar != 'i' && FirstChar != 'e') {
646       CurPPLexer->ParsingPreprocessorDirective = false;
647       // Restore comment saving mode.
648       if (CurLexer) CurLexer->resetExtendedTokenMode();
649       continue;
650     }
651 
652     // Get the identifier name without trigraphs or embedded newlines.  Note
653     // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
654     // when skipping.
655     char DirectiveBuf[20];
656     StringRef Directive;
657     if (!Tok.needsCleaning() && RI.size() < 20) {
658       Directive = RI;
659     } else {
660       std::string DirectiveStr = getSpelling(Tok);
661       size_t IdLen = DirectiveStr.size();
662       if (IdLen >= 20) {
663         CurPPLexer->ParsingPreprocessorDirective = false;
664         // Restore comment saving mode.
665         if (CurLexer) CurLexer->resetExtendedTokenMode();
666         continue;
667       }
668       memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
669       Directive = StringRef(DirectiveBuf, IdLen);
670     }
671 
672     if (Directive.starts_with("if")) {
673       StringRef Sub = Directive.substr(2);
674       if (Sub.empty() ||   // "if"
675           Sub == "def" ||   // "ifdef"
676           Sub == "ndef") {  // "ifndef"
677         // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
678         // bother parsing the condition.
679         DiscardUntilEndOfDirective();
680         CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
681                                        /*foundnonskip*/false,
682                                        /*foundelse*/false);
683       } else {
684         SuggestTypoedDirective(Tok, Directive);
685       }
686     } else if (Directive[0] == 'e') {
687       StringRef Sub = Directive.substr(1);
688       if (Sub == "ndif") {  // "endif"
689         PPConditionalInfo CondInfo;
690         CondInfo.WasSkipping = true; // Silence bogus warning.
691         bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
692         (void)InCond;  // Silence warning in no-asserts mode.
693         assert(!InCond && "Can't be skipping if not in a conditional!");
694 
695         // If we popped the outermost skipping block, we're done skipping!
696         if (!CondInfo.WasSkipping) {
697           SkippingRangeState.endLexPass(Hashptr);
698           // Restore the value of LexingRawMode so that trailing comments
699           // are handled correctly, if we've reached the outermost block.
700           CurPPLexer->LexingRawMode = false;
701           endLoc = CheckEndOfDirective("endif");
702           CurPPLexer->LexingRawMode = true;
703           if (Callbacks)
704             Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
705           break;
706         } else {
707           DiscardUntilEndOfDirective();
708         }
709       } else if (Sub == "lse") { // "else".
710         // #else directive in a skipping conditional.  If not in some other
711         // skipping conditional, and if #else hasn't already been seen, enter it
712         // as a non-skipping conditional.
713         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
714 
715         if (!CondInfo.WasSkipping)
716           SkippingRangeState.endLexPass(Hashptr);
717 
718         // If this is a #else with a #else before it, report the error.
719         if (CondInfo.FoundElse)
720           Diag(Tok, diag::pp_err_else_after_else);
721 
722         // Note that we've seen a #else in this conditional.
723         CondInfo.FoundElse = true;
724 
725         // If the conditional is at the top level, and the #if block wasn't
726         // entered, enter the #else block now.
727         if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
728           CondInfo.FoundNonSkip = true;
729           // Restore the value of LexingRawMode so that trailing comments
730           // are handled correctly.
731           CurPPLexer->LexingRawMode = false;
732           endLoc = CheckEndOfDirective("else");
733           CurPPLexer->LexingRawMode = true;
734           if (Callbacks)
735             Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
736           break;
737         } else {
738           DiscardUntilEndOfDirective();  // C99 6.10p4.
739         }
740       } else if (Sub == "lif") {  // "elif".
741         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
742 
743         if (!CondInfo.WasSkipping)
744           SkippingRangeState.endLexPass(Hashptr);
745 
746         // If this is a #elif with a #else before it, report the error.
747         if (CondInfo.FoundElse)
748           Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
749 
750         // If this is in a skipping block or if we're already handled this #if
751         // block, don't bother parsing the condition.
752         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
753           // FIXME: We should probably do at least some minimal parsing of the
754           // condition to verify that it is well-formed. The current state
755           // allows #elif* directives with completely malformed (or missing)
756           // conditions.
757           DiscardUntilEndOfDirective();
758         } else {
759           // Restore the value of LexingRawMode so that identifiers are
760           // looked up, etc, inside the #elif expression.
761           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
762           CurPPLexer->LexingRawMode = false;
763           IdentifierInfo *IfNDefMacro = nullptr;
764           DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
765           // Stop if Lexer became invalid after hitting code completion token.
766           if (!CurPPLexer)
767             return;
768           const bool CondValue = DER.Conditional;
769           CurPPLexer->LexingRawMode = true;
770           if (Callbacks) {
771             Callbacks->Elif(
772                 Tok.getLocation(), DER.ExprRange,
773                 (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
774                 CondInfo.IfLoc);
775           }
776           // If this condition is true, enter it!
777           if (CondValue) {
778             CondInfo.FoundNonSkip = true;
779             break;
780           }
781         }
782       } else if (Sub == "lifdef" ||  // "elifdef"
783                  Sub == "lifndef") { // "elifndef"
784         bool IsElifDef = Sub == "lifdef";
785         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
786         Token DirectiveToken = Tok;
787 
788         if (!CondInfo.WasSkipping)
789           SkippingRangeState.endLexPass(Hashptr);
790 
791         // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
792         // if this branch is in a skipping block.
793         unsigned DiagID;
794         if (LangOpts.CPlusPlus)
795           DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
796                                         : diag::ext_cxx23_pp_directive;
797         else
798           DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
799                                 : diag::ext_c23_pp_directive;
800         Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
801 
802         // If this is a #elif with a #else before it, report the error.
803         if (CondInfo.FoundElse)
804           Diag(Tok, diag::pp_err_elif_after_else)
805               << (IsElifDef ? PED_Elifdef : PED_Elifndef);
806 
807         // If this is in a skipping block or if we're already handled this #if
808         // block, don't bother parsing the condition.
809         if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
810           // FIXME: We should probably do at least some minimal parsing of the
811           // condition to verify that it is well-formed. The current state
812           // allows #elif* directives with completely malformed (or missing)
813           // conditions.
814           DiscardUntilEndOfDirective();
815         } else {
816           // Restore the value of LexingRawMode so that identifiers are
817           // looked up, etc, inside the #elif[n]def expression.
818           assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
819           CurPPLexer->LexingRawMode = false;
820           Token MacroNameTok;
821           ReadMacroName(MacroNameTok);
822           CurPPLexer->LexingRawMode = true;
823 
824           // If the macro name token is tok::eod, there was an error that was
825           // already reported.
826           if (MacroNameTok.is(tok::eod)) {
827             // Skip code until we get to #endif.  This helps with recovery by
828             // not emitting an error when the #endif is reached.
829             continue;
830           }
831 
832           emitMacroExpansionWarnings(MacroNameTok);
833 
834           CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");
835 
836           IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
837           auto MD = getMacroDefinition(MII);
838           MacroInfo *MI = MD.getMacroInfo();
839 
840           if (Callbacks) {
841             if (IsElifDef) {
842               Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok,
843                                  MD);
844             } else {
845               Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok,
846                                   MD);
847             }
848           }
849           // If this condition is true, enter it!
850           if (static_cast<bool>(MI) == IsElifDef) {
851             CondInfo.FoundNonSkip = true;
852             break;
853           }
854         }
855       } else {
856         SuggestTypoedDirective(Tok, Directive);
857       }
858     } else {
859       SuggestTypoedDirective(Tok, Directive);
860     }
861 
862     CurPPLexer->ParsingPreprocessorDirective = false;
863     // Restore comment saving mode.
864     if (CurLexer) CurLexer->resetExtendedTokenMode();
865   }
866 
867   // Finally, if we are out of the conditional (saw an #endif or ran off the end
868   // of the file, just stop skipping and return to lexing whatever came after
869   // the #if block.
870   CurPPLexer->LexingRawMode = false;
871 
872   // The last skipped range isn't actually skipped yet if it's truncated
873   // by the end of the preamble; we'll resume parsing after the preamble.
874   if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
875     Callbacks->SourceRangeSkipped(
876         SourceRange(HashTokenLoc, endLoc.isValid()
877                                       ? endLoc
878                                       : CurPPLexer->getSourceLocation()),
879         Tok.getLocation());
880 }
881 
882 Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
883                                            bool AllowTextual) {
884   if (!SourceMgr.isInMainFile(Loc)) {
885     // Try to determine the module of the include directive.
886     // FIXME: Look into directly passing the FileEntry from LookupFile instead.
887     FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
888     if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) {
889       // The include comes from an included file.
890       return HeaderInfo.getModuleMap()
891           .findModuleForHeader(*EntryOfIncl, AllowTextual)
892           .getModule();
893     }
894   }
895 
896   // This is either in the main file or not in a file at all. It belongs
897   // to the current module, if there is one.
898   return getLangOpts().CurrentModule.empty()
899              ? nullptr
900              : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);
901 }
902 
903 OptionalFileEntryRef
904 Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
905                                                SourceLocation Loc) {
906   Module *IncM = getModuleForLocation(
907       IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
908 
909   // Walk up through the include stack, looking through textual headers of M
910   // until we hit a non-textual header that we can #include. (We assume textual
911   // headers of a module with non-textual headers aren't meant to be used to
912   // import entities from the module.)
913   auto &SM = getSourceManager();
914   while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
915     auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
916     auto FE = SM.getFileEntryRefForID(ID);
917     if (!FE)
918       break;
919 
920     // We want to find all possible modules that might contain this header, so
921     // search all enclosing directories for module maps and load them.
922     HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,
923                             SourceMgr.isInSystemHeader(Loc));
924 
925     bool InPrivateHeader = false;
926     for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) {
927       if (!Header.isAccessibleFrom(IncM)) {
928         // It's in a private header; we can't #include it.
929         // FIXME: If there's a public header in some module that re-exports it,
930         // then we could suggest including that, but it's not clear that's the
931         // expected way to make this entity visible.
932         InPrivateHeader = true;
933         continue;
934       }
935 
936       // Don't suggest explicitly excluded headers.
937       if (Header.getRole() == ModuleMap::ExcludedHeader)
938         continue;
939 
940       // We'll suggest including textual headers below if they're
941       // include-guarded.
942       if (Header.getRole() & ModuleMap::TextualHeader)
943         continue;
944 
945       // If we have a module import syntax, we shouldn't include a header to
946       // make a particular module visible. Let the caller know they should
947       // suggest an import instead.
948       if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
949         return std::nullopt;
950 
951       // If this is an accessible, non-textual header of M's top-level module
952       // that transitively includes the given location and makes the
953       // corresponding module visible, this is the thing to #include.
954       return *FE;
955     }
956 
957     // FIXME: If we're bailing out due to a private header, we shouldn't suggest
958     // an import either.
959     if (InPrivateHeader)
960       return std::nullopt;
961 
962     // If the header is includable and has an include guard, assume the
963     // intended way to expose its contents is by #include, not by importing a
964     // module that transitively includes it.
965     if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE))
966       return *FE;
967 
968     Loc = SM.getIncludeLoc(ID);
969   }
970 
971   return std::nullopt;
972 }
973 
974 OptionalFileEntryRef Preprocessor::LookupFile(
975     SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
976     ConstSearchDirIterator FromDir, const FileEntry *FromFile,
977     ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
978     SmallVectorImpl<char> *RelativePath,
979     ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
980     bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
981   ConstSearchDirIterator CurDirLocal = nullptr;
982   ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
983 
984   Module *RequestingModule = getModuleForLocation(
985       FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
986 
987   // If the header lookup mechanism may be relative to the current inclusion
988   // stack, record the parent #includes.
989   SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
990   bool BuildSystemModule = false;
991   if (!FromDir && !FromFile) {
992     FileID FID = getCurrentFileLexer()->getFileID();
993     OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
994 
995     // If there is no file entry associated with this file, it must be the
996     // predefines buffer or the module includes buffer. Any other file is not
997     // lexed with a normal lexer, so it won't be scanned for preprocessor
998     // directives.
999     //
1000     // If we have the predefines buffer, resolve #include references (which come
1001     // from the -include command line argument) from the current working
1002     // directory instead of relative to the main file.
1003     //
1004     // If we have the module includes buffer, resolve #include references (which
1005     // come from header declarations in the module map) relative to the module
1006     // map file.
1007     if (!FileEnt) {
1008       if (FID == SourceMgr.getMainFileID() && MainFileDir) {
1009         auto IncludeDir =
1010             HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
1011                 Filename, getCurrentModule())
1012                 ? HeaderInfo.getModuleMap().getBuiltinDir()
1013                 : MainFileDir;
1014         Includers.push_back(std::make_pair(std::nullopt, *IncludeDir));
1015         BuildSystemModule = getCurrentModule()->IsSystem;
1016       } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
1017                       SourceMgr.getMainFileID()))) {
1018         auto CWD = FileMgr.getOptionalDirectoryRef(".");
1019         Includers.push_back(std::make_pair(*FileEnt, *CWD));
1020       }
1021     } else {
1022       Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1023     }
1024 
1025     // MSVC searches the current include stack from top to bottom for
1026     // headers included by quoted include directives.
1027     // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1028     if (LangOpts.MSVCCompat && !isAngled) {
1029       for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1030         if (IsFileLexer(ISEntry))
1031           if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1032             Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1033       }
1034     }
1035   }
1036 
1037   CurDir = CurDirLookup;
1038 
1039   if (FromFile) {
1040     // We're supposed to start looking from after a particular file. Search
1041     // the include path until we find that file or run out of files.
1042     ConstSearchDirIterator TmpCurDir = CurDir;
1043     ConstSearchDirIterator TmpFromDir = nullptr;
1044     while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1045                Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,
1046                Includers, SearchPath, RelativePath, RequestingModule,
1047                SuggestedModule, /*IsMapped=*/nullptr,
1048                /*IsFrameworkFound=*/nullptr, SkipCache)) {
1049       // Keep looking as if this file did a #include_next.
1050       TmpFromDir = TmpCurDir;
1051       ++TmpFromDir;
1052       if (&FE->getFileEntry() == FromFile) {
1053         // Found it.
1054         FromDir = TmpFromDir;
1055         CurDir = TmpCurDir;
1056         break;
1057       }
1058     }
1059   }
1060 
1061   // Do a standard file entry lookup.
1062   OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1063       Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,
1064       RelativePath, RequestingModule, SuggestedModule, IsMapped,
1065       IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1066   if (FE)
1067     return FE;
1068 
1069   OptionalFileEntryRef CurFileEnt;
1070   // Otherwise, see if this is a subframework header.  If so, this is relative
1071   // to one of the headers on the #include stack.  Walk the list of the current
1072   // headers on the #include stack and pass them to HeaderInfo.
1073   if (IsFileLexer()) {
1074     if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1075       if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1076               Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1077               SuggestedModule)) {
1078         return FE;
1079       }
1080     }
1081   }
1082 
1083   for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1084     if (IsFileLexer(ISEntry)) {
1085       if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1086         if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1087                 Filename, *CurFileEnt, SearchPath, RelativePath,
1088                 RequestingModule, SuggestedModule)) {
1089           return FE;
1090         }
1091       }
1092     }
1093   }
1094 
1095   // Otherwise, we really couldn't find the file.
1096   return std::nullopt;
1097 }
1098 
1099 OptionalFileEntryRef
1100 Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
1101                               const FileEntry *LookupFromFile) {
1102   FileManager &FM = this->getFileManager();
1103   if (llvm::sys::path::is_absolute(Filename)) {
1104     // lookup path or immediately fail
1105     llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1106         Filename, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1107     return llvm::expectedToOptional(std::move(ShouldBeEntry));
1108   }
1109 
1110   auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
1111                                StringRef StartingFrom, StringRef FileName,
1112                                bool RemoveInitialFileComponentFromLookupPath) {
1113     llvm::sys::path::native(StartingFrom, LookupPath);
1114     if (RemoveInitialFileComponentFromLookupPath)
1115       llvm::sys::path::remove_filename(LookupPath);
1116     if (!LookupPath.empty() &&
1117         !llvm::sys::path::is_separator(LookupPath.back())) {
1118       LookupPath.push_back(llvm::sys::path::get_separator().front());
1119     }
1120     LookupPath.append(FileName.begin(), FileName.end());
1121   };
1122 
1123   // Otherwise, it's search time!
1124   SmallString<512> LookupPath;
1125   // Non-angled lookup
1126   if (!isAngled) {
1127     if (LookupFromFile) {
1128       // Use file-based lookup.
1129       StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
1130       if (!FullFileDir.empty()) {
1131         SeparateComponents(LookupPath, FullFileDir, Filename, true);
1132         llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1133             LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1134         if (ShouldBeEntry)
1135           return llvm::expectedToOptional(std::move(ShouldBeEntry));
1136         llvm::consumeError(ShouldBeEntry.takeError());
1137       }
1138     }
1139 
1140     // Otherwise, do working directory lookup.
1141     LookupPath.clear();
1142     auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
1143     if (MaybeWorkingDirEntry) {
1144       DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
1145       StringRef WorkingDir = WorkingDirEntry.getName();
1146       if (!WorkingDir.empty()) {
1147         SeparateComponents(LookupPath, WorkingDir, Filename, false);
1148         llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1149             LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1150         if (ShouldBeEntry)
1151           return llvm::expectedToOptional(std::move(ShouldBeEntry));
1152         llvm::consumeError(ShouldBeEntry.takeError());
1153       }
1154     }
1155   }
1156 
1157   for (const auto &Entry : PPOpts->EmbedEntries) {
1158     LookupPath.clear();
1159     SeparateComponents(LookupPath, Entry, Filename, false);
1160     llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef(
1161         LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false);
1162     if (ShouldBeEntry)
1163       return llvm::expectedToOptional(std::move(ShouldBeEntry));
1164     llvm::consumeError(ShouldBeEntry.takeError());
1165   }
1166   return std::nullopt;
1167 }
1168 
1169 //===----------------------------------------------------------------------===//
1170 // Preprocessor Directive Handling.
1171 //===----------------------------------------------------------------------===//
1172 
1173 class Preprocessor::ResetMacroExpansionHelper {
1174 public:
1175   ResetMacroExpansionHelper(Preprocessor *pp)
1176     : PP(pp), save(pp->DisableMacroExpansion) {
1177     if (pp->MacroExpansionInDirectivesOverride)
1178       pp->DisableMacroExpansion = false;
1179   }
1180 
1181   ~ResetMacroExpansionHelper() {
1182     PP->DisableMacroExpansion = save;
1183   }
1184 
1185 private:
1186   Preprocessor *PP;
1187   bool save;
1188 };
1189 
1190 /// Process a directive while looking for the through header or a #pragma
1191 /// hdrstop. The following directives are handled:
1192 /// #include (to check if it is the through header)
1193 /// #define (to warn about macros that don't match the PCH)
1194 /// #pragma (to check for pragma hdrstop).
1195 /// All other directives are completely discarded.
1196 void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1197                                                        SourceLocation HashLoc) {
1198   if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1199     if (II->getPPKeywordID() == tok::pp_define) {
1200       return HandleDefineDirective(Result,
1201                                    /*ImmediatelyAfterHeaderGuard=*/false);
1202     }
1203     if (SkippingUntilPCHThroughHeader &&
1204         II->getPPKeywordID() == tok::pp_include) {
1205       return HandleIncludeDirective(HashLoc, Result);
1206     }
1207     if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1208       Lex(Result);
1209       auto *II = Result.getIdentifierInfo();
1210       if (II && II->getName() == "hdrstop")
1211         return HandlePragmaHdrstop(Result);
1212     }
1213   }
1214   DiscardUntilEndOfDirective();
1215 }
1216 
1217 /// HandleDirective - This callback is invoked when the lexer sees a # token
1218 /// at the start of a line.  This consumes the directive, modifies the
1219 /// lexer/preprocessor state, and advances the lexer(s) so that the next token
1220 /// read is the correct one.
1221 void Preprocessor::HandleDirective(Token &Result) {
1222   // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1223 
1224   // We just parsed a # character at the start of a line, so we're in directive
1225   // mode.  Tell the lexer this so any newlines we see will be converted into an
1226   // EOD token (which terminates the directive).
1227   CurPPLexer->ParsingPreprocessorDirective = true;
1228   if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1229 
1230   bool ImmediatelyAfterTopLevelIfndef =
1231       CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1232   CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1233 
1234   ++NumDirectives;
1235 
1236   // We are about to read a token.  For the multiple-include optimization FA to
1237   // work, we have to remember if we had read any tokens *before* this
1238   // pp-directive.
1239   bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1240 
1241   // Save the '#' token in case we need to return it later.
1242   Token SavedHash = Result;
1243 
1244   // Read the next token, the directive flavor.  This isn't expanded due to
1245   // C99 6.10.3p8.
1246   LexUnexpandedToken(Result);
1247 
1248   // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
1249   //   #define A(x) #x
1250   //   A(abc
1251   //     #warning blah
1252   //   def)
1253   // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1254   // not support this for #include-like directives, since that can result in
1255   // terrible diagnostics, and does not work in GCC.
1256   if (InMacroArgs) {
1257     if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1258       switch (II->getPPKeywordID()) {
1259       case tok::pp_include:
1260       case tok::pp_import:
1261       case tok::pp_include_next:
1262       case tok::pp___include_macros:
1263       case tok::pp_pragma:
1264       case tok::pp_embed:
1265         Diag(Result, diag::err_embedded_directive) << II->getName();
1266         Diag(*ArgMacro, diag::note_macro_expansion_here)
1267             << ArgMacro->getIdentifierInfo();
1268         DiscardUntilEndOfDirective();
1269         return;
1270       default:
1271         break;
1272       }
1273     }
1274     Diag(Result, diag::ext_embedded_directive);
1275   }
1276 
1277   // Temporarily enable macro expansion if set so
1278   // and reset to previous state when returning from this function.
1279   ResetMacroExpansionHelper helper(this);
1280 
1281   if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1282     return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
1283 
1284   switch (Result.getKind()) {
1285   case tok::eod:
1286     // Ignore the null directive with regards to the multiple-include
1287     // optimization, i.e. allow the null directive to appear outside of the
1288     // include guard and still enable the multiple-include optimization.
1289     CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1290     return;   // null directive.
1291   case tok::code_completion:
1292     setCodeCompletionReached();
1293     if (CodeComplete)
1294       CodeComplete->CodeCompleteDirective(
1295                                     CurPPLexer->getConditionalStackDepth() > 0);
1296     return;
1297   case tok::numeric_constant:  // # 7  GNU line marker directive.
1298     // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1299     // directive. However do permit it in the predefines file, as we use line
1300     // markers to mark the builtin macros as being in a system header.
1301     if (getLangOpts().AsmPreprocessor &&
1302         SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID())
1303       break;
1304     return HandleDigitDirective(Result);
1305   default:
1306     IdentifierInfo *II = Result.getIdentifierInfo();
1307     if (!II) break; // Not an identifier.
1308 
1309     // Ask what the preprocessor keyword ID is.
1310     switch (II->getPPKeywordID()) {
1311     default: break;
1312     // C99 6.10.1 - Conditional Inclusion.
1313     case tok::pp_if:
1314       return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
1315     case tok::pp_ifdef:
1316       return HandleIfdefDirective(Result, SavedHash, false,
1317                                   true /*not valid for miopt*/);
1318     case tok::pp_ifndef:
1319       return HandleIfdefDirective(Result, SavedHash, true,
1320                                   ReadAnyTokensBeforeDirective);
1321     case tok::pp_elif:
1322     case tok::pp_elifdef:
1323     case tok::pp_elifndef:
1324       return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID());
1325 
1326     case tok::pp_else:
1327       return HandleElseDirective(Result, SavedHash);
1328     case tok::pp_endif:
1329       return HandleEndifDirective(Result);
1330 
1331     // C99 6.10.2 - Source File Inclusion.
1332     case tok::pp_include:
1333       // Handle #include.
1334       return HandleIncludeDirective(SavedHash.getLocation(), Result);
1335     case tok::pp___include_macros:
1336       // Handle -imacros.
1337       return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
1338 
1339     // C99 6.10.3 - Macro Replacement.
1340     case tok::pp_define:
1341       return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
1342     case tok::pp_undef:
1343       return HandleUndefDirective();
1344 
1345     // C99 6.10.4 - Line Control.
1346     case tok::pp_line:
1347       return HandleLineDirective();
1348 
1349     // C99 6.10.5 - Error Directive.
1350     case tok::pp_error:
1351       return HandleUserDiagnosticDirective(Result, false);
1352 
1353     // C99 6.10.6 - Pragma Directive.
1354     case tok::pp_pragma:
1355       return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
1356 
1357     // GNU Extensions.
1358     case tok::pp_import:
1359       return HandleImportDirective(SavedHash.getLocation(), Result);
1360     case tok::pp_include_next:
1361       return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
1362 
1363     case tok::pp_warning:
1364       if (LangOpts.CPlusPlus)
1365         Diag(Result, LangOpts.CPlusPlus23
1366                          ? diag::warn_cxx23_compat_warning_directive
1367                          : diag::ext_pp_warning_directive)
1368             << /*C++23*/ 1;
1369       else
1370         Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1371                                   : diag::ext_pp_warning_directive)
1372             << /*C23*/ 0;
1373 
1374       return HandleUserDiagnosticDirective(Result, true);
1375     case tok::pp_ident:
1376       return HandleIdentSCCSDirective(Result);
1377     case tok::pp_sccs:
1378       return HandleIdentSCCSDirective(Result);
1379     case tok::pp_embed:
1380       return HandleEmbedDirective(SavedHash.getLocation(), Result,
1381                                   getCurrentFileLexer()
1382                                       ? *getCurrentFileLexer()->getFileEntry()
1383                                       : static_cast<FileEntry *>(nullptr));
1384     case tok::pp_assert:
1385       //isExtension = true;  // FIXME: implement #assert
1386       break;
1387     case tok::pp_unassert:
1388       //isExtension = true;  // FIXME: implement #unassert
1389       break;
1390 
1391     case tok::pp___public_macro:
1392       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1393         return HandleMacroPublicDirective(Result);
1394       break;
1395 
1396     case tok::pp___private_macro:
1397       if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1398         return HandleMacroPrivateDirective();
1399       break;
1400     }
1401     break;
1402   }
1403 
1404   // If this is a .S file, treat unknown # directives as non-preprocessor
1405   // directives.  This is important because # may be a comment or introduce
1406   // various pseudo-ops.  Just return the # token and push back the following
1407   // token to be lexed next time.
1408   if (getLangOpts().AsmPreprocessor) {
1409     auto Toks = std::make_unique<Token[]>(2);
1410     // Return the # and the token after it.
1411     Toks[0] = SavedHash;
1412     Toks[1] = Result;
1413 
1414     // If the second token is a hashhash token, then we need to translate it to
1415     // unknown so the token lexer doesn't try to perform token pasting.
1416     if (Result.is(tok::hashhash))
1417       Toks[1].setKind(tok::unknown);
1418 
1419     // Enter this token stream so that we re-lex the tokens.  Make sure to
1420     // enable macro expansion, in case the token after the # is an identifier
1421     // that is expanded.
1422     EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
1423     return;
1424   }
1425 
1426   // If we reached here, the preprocessing token is not valid!
1427   // Start suggesting if a similar directive found.
1428   Diag(Result, diag::err_pp_invalid_directive) << 0;
1429 
1430   // Read the rest of the PP line.
1431   DiscardUntilEndOfDirective();
1432 
1433   // Okay, we're done parsing the directive.
1434 }
1435 
1436 /// GetLineValue - Convert a numeric token into an unsigned value, emitting
1437 /// Diagnostic DiagID if it is invalid, and returning the value in Val.
1438 static bool GetLineValue(Token &DigitTok, unsigned &Val,
1439                          unsigned DiagID, Preprocessor &PP,
1440                          bool IsGNULineDirective=false) {
1441   if (DigitTok.isNot(tok::numeric_constant)) {
1442     PP.Diag(DigitTok, DiagID);
1443 
1444     if (DigitTok.isNot(tok::eod))
1445       PP.DiscardUntilEndOfDirective();
1446     return true;
1447   }
1448 
1449   SmallString<64> IntegerBuffer;
1450   IntegerBuffer.resize(DigitTok.getLength());
1451   const char *DigitTokBegin = &IntegerBuffer[0];
1452   bool Invalid = false;
1453   unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
1454   if (Invalid)
1455     return true;
1456 
1457   // Verify that we have a simple digit-sequence, and compute the value.  This
1458   // is always a simple digit string computed in decimal, so we do this manually
1459   // here.
1460   Val = 0;
1461   for (unsigned i = 0; i != ActualLength; ++i) {
1462     // C++1y [lex.fcon]p1:
1463     //   Optional separating single quotes in a digit-sequence are ignored
1464     if (DigitTokBegin[i] == '\'')
1465       continue;
1466 
1467     if (!isDigit(DigitTokBegin[i])) {
1468       PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
1469               diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1470       PP.DiscardUntilEndOfDirective();
1471       return true;
1472     }
1473 
1474     unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1475     if (NextVal < Val) { // overflow.
1476       PP.Diag(DigitTok, DiagID);
1477       PP.DiscardUntilEndOfDirective();
1478       return true;
1479     }
1480     Val = NextVal;
1481   }
1482 
1483   if (DigitTokBegin[0] == '0' && Val)
1484     PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1485       << IsGNULineDirective;
1486 
1487   return false;
1488 }
1489 
1490 /// Handle a \#line directive: C99 6.10.4.
1491 ///
1492 /// The two acceptable forms are:
1493 /// \verbatim
1494 ///   # line digit-sequence
1495 ///   # line digit-sequence "s-char-sequence"
1496 /// \endverbatim
1497 void Preprocessor::HandleLineDirective() {
1498   // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are
1499   // expanded.
1500   Token DigitTok;
1501   Lex(DigitTok);
1502 
1503   // Validate the number and convert it to an unsigned.
1504   unsigned LineNo;
1505   if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1506     return;
1507 
1508   if (LineNo == 0)
1509     Diag(DigitTok, diag::ext_pp_line_zero);
1510 
1511   // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1512   // number greater than 2147483647".  C90 requires that the line # be <= 32767.
1513   unsigned LineLimit = 32768U;
1514   if (LangOpts.C99 || LangOpts.CPlusPlus11)
1515     LineLimit = 2147483648U;
1516   if (LineNo >= LineLimit)
1517     Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1518   else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1519     Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1520 
1521   int FilenameID = -1;
1522   Token StrTok;
1523   Lex(StrTok);
1524 
1525   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1526   // string followed by eod.
1527   if (StrTok.is(tok::eod))
1528     ; // ok
1529   else if (StrTok.isNot(tok::string_literal)) {
1530     Diag(StrTok, diag::err_pp_line_invalid_filename);
1531     DiscardUntilEndOfDirective();
1532     return;
1533   } else if (StrTok.hasUDSuffix()) {
1534     Diag(StrTok, diag::err_invalid_string_udl);
1535     DiscardUntilEndOfDirective();
1536     return;
1537   } else {
1538     // Parse and validate the string, converting it into a unique ID.
1539     StringLiteralParser Literal(StrTok, *this);
1540     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1541     if (Literal.hadError) {
1542       DiscardUntilEndOfDirective();
1543       return;
1544     }
1545     if (Literal.Pascal) {
1546       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1547       DiscardUntilEndOfDirective();
1548       return;
1549     }
1550     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1551 
1552     // Verify that there is nothing after the string, other than EOD.  Because
1553     // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1554     CheckEndOfDirective("line", true);
1555   }
1556 
1557   // Take the file kind of the file containing the #line directive. #line
1558   // directives are often used for generated sources from the same codebase, so
1559   // the new file should generally be classified the same way as the current
1560   // file. This is visible in GCC's pre-processed output, which rewrites #line
1561   // to GNU line markers.
1562   SrcMgr::CharacteristicKind FileKind =
1563       SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1564 
1565   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
1566                         false, FileKind);
1567 
1568   if (Callbacks)
1569     Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
1570                            PPCallbacks::RenameFile, FileKind);
1571 }
1572 
1573 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1574 /// marker directive.
1575 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1576                                 SrcMgr::CharacteristicKind &FileKind,
1577                                 Preprocessor &PP) {
1578   unsigned FlagVal;
1579   Token FlagTok;
1580   PP.Lex(FlagTok);
1581   if (FlagTok.is(tok::eod)) return false;
1582   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1583     return true;
1584 
1585   if (FlagVal == 1) {
1586     IsFileEntry = true;
1587 
1588     PP.Lex(FlagTok);
1589     if (FlagTok.is(tok::eod)) return false;
1590     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1591       return true;
1592   } else if (FlagVal == 2) {
1593     IsFileExit = true;
1594 
1595     SourceManager &SM = PP.getSourceManager();
1596     // If we are leaving the current presumed file, check to make sure the
1597     // presumed include stack isn't empty!
1598     FileID CurFileID =
1599       SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
1600     PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
1601     if (PLoc.isInvalid())
1602       return true;
1603 
1604     // If there is no include loc (main file) or if the include loc is in a
1605     // different physical file, then we aren't in a "1" line marker flag region.
1606     SourceLocation IncLoc = PLoc.getIncludeLoc();
1607     if (IncLoc.isInvalid() ||
1608         SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
1609       PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1610       PP.DiscardUntilEndOfDirective();
1611       return true;
1612     }
1613 
1614     PP.Lex(FlagTok);
1615     if (FlagTok.is(tok::eod)) return false;
1616     if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1617       return true;
1618   }
1619 
1620   // We must have 3 if there are still flags.
1621   if (FlagVal != 3) {
1622     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1623     PP.DiscardUntilEndOfDirective();
1624     return true;
1625   }
1626 
1627   FileKind = SrcMgr::C_System;
1628 
1629   PP.Lex(FlagTok);
1630   if (FlagTok.is(tok::eod)) return false;
1631   if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1632     return true;
1633 
1634   // We must have 4 if there is yet another flag.
1635   if (FlagVal != 4) {
1636     PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1637     PP.DiscardUntilEndOfDirective();
1638     return true;
1639   }
1640 
1641   FileKind = SrcMgr::C_ExternCSystem;
1642 
1643   PP.Lex(FlagTok);
1644   if (FlagTok.is(tok::eod)) return false;
1645 
1646   // There are no more valid flags here.
1647   PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1648   PP.DiscardUntilEndOfDirective();
1649   return true;
1650 }
1651 
1652 /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1653 /// one of the following forms:
1654 ///
1655 ///     # 42
1656 ///     # 42 "file" ('1' | '2')?
1657 ///     # 42 "file" ('1' | '2')? '3' '4'?
1658 ///
1659 void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1660   // Validate the number and convert it to an unsigned.  GNU does not have a
1661   // line # limit other than it fit in 32-bits.
1662   unsigned LineNo;
1663   if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1664                    *this, true))
1665     return;
1666 
1667   Token StrTok;
1668   Lex(StrTok);
1669 
1670   bool IsFileEntry = false, IsFileExit = false;
1671   int FilenameID = -1;
1672   SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1673 
1674   // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1675   // string followed by eod.
1676   if (StrTok.is(tok::eod)) {
1677     Diag(StrTok, diag::ext_pp_gnu_line_directive);
1678     // Treat this like "#line NN", which doesn't change file characteristics.
1679     FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1680   } else if (StrTok.isNot(tok::string_literal)) {
1681     Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1682     DiscardUntilEndOfDirective();
1683     return;
1684   } else if (StrTok.hasUDSuffix()) {
1685     Diag(StrTok, diag::err_invalid_string_udl);
1686     DiscardUntilEndOfDirective();
1687     return;
1688   } else {
1689     // Parse and validate the string, converting it into a unique ID.
1690     StringLiteralParser Literal(StrTok, *this);
1691     assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1692     if (Literal.hadError) {
1693       DiscardUntilEndOfDirective();
1694       return;
1695     }
1696     if (Literal.Pascal) {
1697       Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1698       DiscardUntilEndOfDirective();
1699       return;
1700     }
1701 
1702     // If a filename was present, read any flags that are present.
1703     if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
1704       return;
1705     if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) &&
1706         !SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation()))
1707       Diag(StrTok, diag::ext_pp_gnu_line_directive);
1708 
1709     // Exiting to an empty string means pop to the including file, so leave
1710     // FilenameID as -1 in that case.
1711     if (!(IsFileExit && Literal.GetString().empty()))
1712       FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1713   }
1714 
1715   // Create a line note with this information.
1716   SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1717                         IsFileExit, FileKind);
1718 
1719   // If the preprocessor has callbacks installed, notify them of the #line
1720   // change.  This is used so that the line marker comes out in -E mode for
1721   // example.
1722   if (Callbacks) {
1723     PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1724     if (IsFileEntry)
1725       Reason = PPCallbacks::EnterFile;
1726     else if (IsFileExit)
1727       Reason = PPCallbacks::ExitFile;
1728 
1729     Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
1730   }
1731 }
1732 
1733 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1734 ///
1735 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1736                                                  bool isWarning) {
1737   // Read the rest of the line raw.  We do this because we don't want macros
1738   // to be expanded and we don't require that the tokens be valid preprocessing
1739   // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
1740   // collapse multiple consecutive white space between tokens, but this isn't
1741   // specified by the standard.
1742   SmallString<128> Message;
1743   CurLexer->ReadToEndOfLine(&Message);
1744 
1745   // Find the first non-whitespace character, so that we can make the
1746   // diagnostic more succinct.
1747   StringRef Msg = Message.str().ltrim(' ');
1748 
1749   if (isWarning)
1750     Diag(Tok, diag::pp_hash_warning) << Msg;
1751   else
1752     Diag(Tok, diag::err_pp_hash_error) << Msg;
1753 }
1754 
1755 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1756 ///
1757 void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1758   // Yes, this directive is an extension.
1759   Diag(Tok, diag::ext_pp_ident_directive);
1760 
1761   // Read the string argument.
1762   Token StrTok;
1763   Lex(StrTok);
1764 
1765   // If the token kind isn't a string, it's a malformed directive.
1766   if (StrTok.isNot(tok::string_literal) &&
1767       StrTok.isNot(tok::wide_string_literal)) {
1768     Diag(StrTok, diag::err_pp_malformed_ident);
1769     if (StrTok.isNot(tok::eod))
1770       DiscardUntilEndOfDirective();
1771     return;
1772   }
1773 
1774   if (StrTok.hasUDSuffix()) {
1775     Diag(StrTok, diag::err_invalid_string_udl);
1776     DiscardUntilEndOfDirective();
1777     return;
1778   }
1779 
1780   // Verify that there is nothing after the string, other than EOD.
1781   CheckEndOfDirective("ident");
1782 
1783   if (Callbacks) {
1784     bool Invalid = false;
1785     std::string Str = getSpelling(StrTok, &Invalid);
1786     if (!Invalid)
1787       Callbacks->Ident(Tok.getLocation(), Str);
1788   }
1789 }
1790 
1791 /// Handle a #public directive.
1792 void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1793   Token MacroNameTok;
1794   ReadMacroName(MacroNameTok, MU_Undef);
1795 
1796   // Error reading macro name?  If so, diagnostic already issued.
1797   if (MacroNameTok.is(tok::eod))
1798     return;
1799 
1800   // Check to see if this is the last token on the #__public_macro line.
1801   CheckEndOfDirective("__public_macro");
1802 
1803   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1804   // Okay, we finally have a valid identifier to undef.
1805   MacroDirective *MD = getLocalMacroDirective(II);
1806 
1807   // If the macro is not defined, this is an error.
1808   if (!MD) {
1809     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1810     return;
1811   }
1812 
1813   // Note that this macro has now been exported.
1814   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1815                                 MacroNameTok.getLocation(), /*isPublic=*/true));
1816 }
1817 
1818 /// Handle a #private directive.
1819 void Preprocessor::HandleMacroPrivateDirective() {
1820   Token MacroNameTok;
1821   ReadMacroName(MacroNameTok, MU_Undef);
1822 
1823   // Error reading macro name?  If so, diagnostic already issued.
1824   if (MacroNameTok.is(tok::eod))
1825     return;
1826 
1827   // Check to see if this is the last token on the #__private_macro line.
1828   CheckEndOfDirective("__private_macro");
1829 
1830   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1831   // Okay, we finally have a valid identifier to undef.
1832   MacroDirective *MD = getLocalMacroDirective(II);
1833 
1834   // If the macro is not defined, this is an error.
1835   if (!MD) {
1836     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1837     return;
1838   }
1839 
1840   // Note that this macro has now been marked private.
1841   appendMacroDirective(II, AllocateVisibilityMacroDirective(
1842                                MacroNameTok.getLocation(), /*isPublic=*/false));
1843 }
1844 
1845 //===----------------------------------------------------------------------===//
1846 // Preprocessor Include Directive Handling.
1847 //===----------------------------------------------------------------------===//
1848 
1849 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1850 /// checked and spelled filename, e.g. as an operand of \#include. This returns
1851 /// true if the input filename was in <>'s or false if it were in ""'s.  The
1852 /// caller is expected to provide a buffer that is large enough to hold the
1853 /// spelling of the filename, but is also expected to handle the case when
1854 /// this method decides to use a different buffer.
1855 bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1856                                               StringRef &Buffer) {
1857   // Get the text form of the filename.
1858   assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1859 
1860   // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1861   // C++20 [lex.header]/2:
1862   //
1863   // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1864   //   in C: behavior is undefined
1865   //   in C++: program is conditionally-supported with implementation-defined
1866   //           semantics
1867 
1868   // Make sure the filename is <x> or "x".
1869   bool isAngled;
1870   if (Buffer[0] == '<') {
1871     if (Buffer.back() != '>') {
1872       Diag(Loc, diag::err_pp_expects_filename);
1873       Buffer = StringRef();
1874       return true;
1875     }
1876     isAngled = true;
1877   } else if (Buffer[0] == '"') {
1878     if (Buffer.back() != '"') {
1879       Diag(Loc, diag::err_pp_expects_filename);
1880       Buffer = StringRef();
1881       return true;
1882     }
1883     isAngled = false;
1884   } else {
1885     Diag(Loc, diag::err_pp_expects_filename);
1886     Buffer = StringRef();
1887     return true;
1888   }
1889 
1890   // Diagnose #include "" as invalid.
1891   if (Buffer.size() <= 2) {
1892     Diag(Loc, diag::err_pp_empty_filename);
1893     Buffer = StringRef();
1894     return true;
1895   }
1896 
1897   // Skip the brackets.
1898   Buffer = Buffer.substr(1, Buffer.size()-2);
1899   return isAngled;
1900 }
1901 
1902 /// Push a token onto the token stream containing an annotation.
1903 void Preprocessor::EnterAnnotationToken(SourceRange Range,
1904                                         tok::TokenKind Kind,
1905                                         void *AnnotationVal) {
1906   // FIXME: Produce this as the current token directly, rather than
1907   // allocating a new token for it.
1908   auto Tok = std::make_unique<Token[]>(1);
1909   Tok[0].startToken();
1910   Tok[0].setKind(Kind);
1911   Tok[0].setLocation(Range.getBegin());
1912   Tok[0].setAnnotationEndLoc(Range.getEnd());
1913   Tok[0].setAnnotationValue(AnnotationVal);
1914   EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
1915 }
1916 
1917 /// Produce a diagnostic informing the user that a #include or similar
1918 /// was implicitly treated as a module import.
1919 static void diagnoseAutoModuleImport(
1920     Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
1921     ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
1922     SourceLocation PathEnd) {
1923   SmallString<128> PathString;
1924   for (size_t I = 0, N = Path.size(); I != N; ++I) {
1925     if (I)
1926       PathString += '.';
1927     PathString += Path[I].first->getName();
1928   }
1929 
1930   int IncludeKind = 0;
1931   switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1932   case tok::pp_include:
1933     IncludeKind = 0;
1934     break;
1935 
1936   case tok::pp_import:
1937     IncludeKind = 1;
1938     break;
1939 
1940   case tok::pp_include_next:
1941     IncludeKind = 2;
1942     break;
1943 
1944   case tok::pp___include_macros:
1945     IncludeKind = 3;
1946     break;
1947 
1948   default:
1949     llvm_unreachable("unknown include directive kind");
1950   }
1951 
1952   PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)
1953       << IncludeKind << PathString;
1954 }
1955 
1956 // Given a vector of path components and a string containing the real
1957 // path to the file, build a properly-cased replacement in the vector,
1958 // and return true if the replacement should be suggested.
1959 static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1960                             StringRef RealPathName,
1961                             llvm::sys::path::Style Separator) {
1962   auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
1963   auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
1964   int Cnt = 0;
1965   bool SuggestReplacement = false;
1966 
1967   auto IsSep = [Separator](StringRef Component) {
1968     return Component.size() == 1 &&
1969            llvm::sys::path::is_separator(Component[0], Separator);
1970   };
1971 
1972   // Below is a best-effort to handle ".." in paths. It is admittedly
1973   // not 100% correct in the presence of symlinks.
1974   for (auto &Component : llvm::reverse(Components)) {
1975     if ("." == Component) {
1976     } else if (".." == Component) {
1977       ++Cnt;
1978     } else if (Cnt) {
1979       --Cnt;
1980     } else if (RealPathComponentIter != RealPathComponentEnd) {
1981       if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
1982           Component != *RealPathComponentIter) {
1983         // If these non-separator path components differ by more than just case,
1984         // then we may be looking at symlinked paths. Bail on this diagnostic to
1985         // avoid noisy false positives.
1986         SuggestReplacement =
1987             RealPathComponentIter->equals_insensitive(Component);
1988         if (!SuggestReplacement)
1989           break;
1990         Component = *RealPathComponentIter;
1991       }
1992       ++RealPathComponentIter;
1993     }
1994   }
1995   return SuggestReplacement;
1996 }
1997 
1998 bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
1999                                           const TargetInfo &TargetInfo,
2000                                           const Module &M,
2001                                           DiagnosticsEngine &Diags) {
2002   Module::Requirement Requirement;
2003   Module::UnresolvedHeaderDirective MissingHeader;
2004   Module *ShadowingModule = nullptr;
2005   if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
2006                     ShadowingModule))
2007     return false;
2008 
2009   if (MissingHeader.FileNameLoc.isValid()) {
2010     Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
2011         << MissingHeader.IsUmbrella << MissingHeader.FileName;
2012   } else if (ShadowingModule) {
2013     Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name;
2014     Diags.Report(ShadowingModule->DefinitionLoc,
2015                  diag::note_previous_definition);
2016   } else {
2017     // FIXME: Track the location at which the requirement was specified, and
2018     // use it here.
2019     Diags.Report(M.DefinitionLoc, diag::err_module_unavailable)
2020         << M.getFullModuleName() << Requirement.RequiredState
2021         << Requirement.FeatureName;
2022   }
2023   return true;
2024 }
2025 
2026 std::pair<ConstSearchDirIterator, const FileEntry *>
2027 Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
2028   // #include_next is like #include, except that we start searching after
2029   // the current found directory.  If we can't do this, issue a
2030   // diagnostic.
2031   ConstSearchDirIterator Lookup = CurDirLookup;
2032   const FileEntry *LookupFromFile = nullptr;
2033 
2034   if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2035     // If the main file is a header, then it's either for PCH/AST generation,
2036     // or libclang opened it. Either way, handle it as a normal include below
2037     // and do not complain about include_next.
2038   } else if (isInPrimaryFile()) {
2039     Lookup = nullptr;
2040     Diag(IncludeNextTok, diag::pp_include_next_in_primary);
2041   } else if (CurLexerSubmodule) {
2042     // Start looking up in the directory *after* the one in which the current
2043     // file would be found, if any.
2044     assert(CurPPLexer && "#include_next directive in macro?");
2045     if (auto FE = CurPPLexer->getFileEntry())
2046       LookupFromFile = *FE;
2047     Lookup = nullptr;
2048   } else if (!Lookup) {
2049     // The current file was not found by walking the include path. Either it
2050     // is the primary file (handled above), or it was found by absolute path,
2051     // or it was found relative to such a file.
2052     // FIXME: Track enough information so we know which case we're in.
2053     Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
2054   } else {
2055     // Start looking up in the next directory.
2056     ++Lookup;
2057   }
2058 
2059   return {Lookup, LookupFromFile};
2060 }
2061 
2062 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
2063 /// the file to be included from the lexer, then include it!  This is a common
2064 /// routine with functionality shared between \#include, \#include_next and
2065 /// \#import.  LookupFrom is set when this is a \#include_next directive, it
2066 /// specifies the file to start searching from.
2067 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
2068                                           Token &IncludeTok,
2069                                           ConstSearchDirIterator LookupFrom,
2070                                           const FileEntry *LookupFromFile) {
2071   Token FilenameTok;
2072   if (LexHeaderName(FilenameTok))
2073     return;
2074 
2075   if (FilenameTok.isNot(tok::header_name)) {
2076     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
2077     if (FilenameTok.isNot(tok::eod))
2078       DiscardUntilEndOfDirective();
2079     return;
2080   }
2081 
2082   // Verify that there is nothing after the filename, other than EOD.  Note
2083   // that we allow macros that expand to nothing after the filename, because
2084   // this falls into the category of "#include pp-tokens new-line" specified
2085   // in C99 6.10.2p4.
2086   SourceLocation EndLoc =
2087       CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
2088 
2089   auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
2090                                             EndLoc, LookupFrom, LookupFromFile);
2091   switch (Action.Kind) {
2092   case ImportAction::None:
2093   case ImportAction::SkippedModuleImport:
2094     break;
2095   case ImportAction::ModuleBegin:
2096     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2097                          tok::annot_module_begin, Action.ModuleForHeader);
2098     break;
2099   case ImportAction::HeaderUnitImport:
2100     EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
2101                          Action.ModuleForHeader);
2102     break;
2103   case ImportAction::ModuleImport:
2104     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2105                          tok::annot_module_include, Action.ModuleForHeader);
2106     break;
2107   case ImportAction::Failure:
2108     assert(TheModuleLoader.HadFatalFailure &&
2109            "This should be an early exit only to a fatal error");
2110     TheModuleLoader.HadFatalFailure = true;
2111     IncludeTok.setKind(tok::eof);
2112     CurLexer->cutOffLexing();
2113     return;
2114   }
2115 }
2116 
2117 OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2118     ConstSearchDirIterator *CurDir, StringRef &Filename,
2119     SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2120     const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2121     bool &IsMapped, ConstSearchDirIterator LookupFrom,
2122     const FileEntry *LookupFromFile, StringRef &LookupFilename,
2123     SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2124     ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2125   auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2126     if (LangOpts.AsmPreprocessor)
2127       return;
2128 
2129     Module *RequestingModule = getModuleForLocation(
2130         FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
2131     bool RequestingModuleIsModuleInterface =
2132         !SourceMgr.isInMainFile(FilenameLoc);
2133 
2134     HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2135         RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2136         Filename, FE);
2137   };
2138 
2139   OptionalFileEntryRef File = LookupFile(
2140       FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,
2141       Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2142       &SuggestedModule, &IsMapped, &IsFrameworkFound);
2143   if (File) {
2144     DiagnoseHeaderInclusion(*File);
2145     return File;
2146   }
2147 
2148   // Give the clients a chance to silently skip this include.
2149   if (Callbacks && Callbacks->FileNotFound(Filename))
2150     return std::nullopt;
2151 
2152   if (SuppressIncludeNotFoundError)
2153     return std::nullopt;
2154 
2155   // If the file could not be located and it was included via angle
2156   // brackets, we can attempt a lookup as though it were a quoted path to
2157   // provide the user with a possible fixit.
2158   if (isAngled) {
2159     OptionalFileEntryRef File = LookupFile(
2160         FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,
2161         Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2162         &SuggestedModule, &IsMapped,
2163         /*IsFrameworkFound=*/nullptr);
2164     if (File) {
2165       DiagnoseHeaderInclusion(*File);
2166       Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
2167           << Filename << IsImportDecl
2168           << FixItHint::CreateReplacement(FilenameRange,
2169                                           "\"" + Filename.str() + "\"");
2170       return File;
2171     }
2172   }
2173 
2174   // Check for likely typos due to leading or trailing non-isAlphanumeric
2175   // characters
2176   StringRef OriginalFilename = Filename;
2177   if (LangOpts.SpellChecking) {
2178     // A heuristic to correct a typo file name by removing leading and
2179     // trailing non-isAlphanumeric characters.
2180     auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2181       Filename = Filename.drop_until(isAlphanumeric);
2182       while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
2183         Filename = Filename.drop_back();
2184       }
2185       return Filename;
2186     };
2187     StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2188     StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2189 
2190     OptionalFileEntryRef File = LookupFile(
2191         FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,
2192         LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,
2193         Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
2194         /*IsFrameworkFound=*/nullptr);
2195     if (File) {
2196       DiagnoseHeaderInclusion(*File);
2197       auto Hint =
2198           isAngled ? FixItHint::CreateReplacement(
2199                          FilenameRange, "<" + TypoCorrectionName.str() + ">")
2200                    : FixItHint::CreateReplacement(
2201                          FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
2202       Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
2203           << OriginalFilename << TypoCorrectionName << Hint;
2204       // We found the file, so set the Filename to the name after typo
2205       // correction.
2206       Filename = TypoCorrectionName;
2207       LookupFilename = TypoCorrectionLookupName;
2208       return File;
2209     }
2210   }
2211 
2212   // If the file is still not found, just go with the vanilla diagnostic
2213   assert(!File && "expected missing file");
2214   Diag(FilenameTok, diag::err_pp_file_not_found)
2215       << OriginalFilename << FilenameRange;
2216   if (IsFrameworkFound) {
2217     size_t SlashPos = OriginalFilename.find('/');
2218     assert(SlashPos != StringRef::npos &&
2219            "Include with framework name should have '/' in the filename");
2220     StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
2221     FrameworkCacheEntry &CacheEntry =
2222         HeaderInfo.LookupFrameworkCache(FrameworkName);
2223     assert(CacheEntry.Directory && "Found framework should be in cache");
2224     Diag(FilenameTok, diag::note_pp_framework_without_header)
2225         << OriginalFilename.substr(SlashPos + 1) << FrameworkName
2226         << CacheEntry.Directory->getName();
2227   }
2228 
2229   return std::nullopt;
2230 }
2231 
2232 /// Handle either a #include-like directive or an import declaration that names
2233 /// a header file.
2234 ///
2235 /// \param HashLoc The location of the '#' token for an include, or
2236 ///        SourceLocation() for an import declaration.
2237 /// \param IncludeTok The include / include_next / import token.
2238 /// \param FilenameTok The header-name token.
2239 /// \param EndLoc The location at which any imported macros become visible.
2240 /// \param LookupFrom For #include_next, the starting directory for the
2241 ///        directory lookup.
2242 /// \param LookupFromFile For #include_next, the starting file for the directory
2243 ///        lookup.
2244 Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2245     SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2246     SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2247     const FileEntry *LookupFromFile) {
2248   SmallString<128> FilenameBuffer;
2249   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
2250   SourceLocation CharEnd = FilenameTok.getEndLoc();
2251 
2252   CharSourceRange FilenameRange
2253     = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
2254   StringRef OriginalFilename = Filename;
2255   bool isAngled =
2256     GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
2257 
2258   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2259   // error.
2260   if (Filename.empty())
2261     return {ImportAction::None};
2262 
2263   bool IsImportDecl = HashLoc.isInvalid();
2264   SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2265 
2266   // Complain about attempts to #include files in an audit pragma.
2267   if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
2268     Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2269     Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);
2270 
2271     // Immediately leave the pragma.
2272     PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
2273   }
2274 
2275   // Complain about attempts to #include files in an assume-nonnull pragma.
2276   if (PragmaAssumeNonNullLoc.isValid()) {
2277     Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2278     Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
2279 
2280     // Immediately leave the pragma.
2281     PragmaAssumeNonNullLoc = SourceLocation();
2282   }
2283 
2284   if (HeaderInfo.HasIncludeAliasMap()) {
2285     // Map the filename with the brackets still attached.  If the name doesn't
2286     // map to anything, fall back on the filename we've already gotten the
2287     // spelling for.
2288     StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
2289     if (!NewName.empty())
2290       Filename = NewName;
2291   }
2292 
2293   // Search include directories.
2294   bool IsMapped = false;
2295   bool IsFrameworkFound = false;
2296   ConstSearchDirIterator CurDir = nullptr;
2297   SmallString<1024> SearchPath;
2298   SmallString<1024> RelativePath;
2299   // We get the raw path only if we have 'Callbacks' to which we later pass
2300   // the path.
2301   ModuleMap::KnownHeader SuggestedModule;
2302   SourceLocation FilenameLoc = FilenameTok.getLocation();
2303   StringRef LookupFilename = Filename;
2304 
2305   // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2306   // is unnecessary on Windows since the filesystem there handles backslashes.
2307   SmallString<128> NormalizedPath;
2308   llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2309   if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
2310     NormalizedPath = Filename.str();
2311     llvm::sys::path::native(NormalizedPath);
2312     LookupFilename = NormalizedPath;
2313     BackslashStyle = llvm::sys::path::Style::windows;
2314   }
2315 
2316   OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2317       &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2318       IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2319       LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2320 
2321   if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2322     if (File && isPCHThroughHeader(&File->getFileEntry()))
2323       SkippingUntilPCHThroughHeader = false;
2324     return {ImportAction::None};
2325   }
2326 
2327   // Should we enter the source file? Set to Skip if either the source file is
2328   // known to have no effect beyond its effect on module visibility -- that is,
2329   // if it's got an include guard that is already defined, set to Import if it
2330   // is a modular header we've already built and should import.
2331 
2332   // For C++20 Modules
2333   // [cpp.include]/7 If the header identified by the header-name denotes an
2334   // importable header, it is implementation-defined whether the #include
2335   // preprocessing directive is instead replaced by an import directive.
2336   // For this implementation, the translation is permitted when we are parsing
2337   // the Global Module Fragment, and not otherwise (the cases where it would be
2338   // valid to replace an include with an import are highly constrained once in
2339   // named module purview; this choice avoids considerable complexity in
2340   // determining valid cases).
2341 
2342   enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2343 
2344   if (PPOpts->SingleFileParseMode)
2345     Action = IncludeLimitReached;
2346 
2347   // If we've reached the max allowed include depth, it is usually due to an
2348   // include cycle. Don't enter already processed files again as it can lead to
2349   // reaching the max allowed include depth again.
2350   if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2351       alreadyIncluded(*File))
2352     Action = IncludeLimitReached;
2353 
2354   // FIXME: We do not have a good way to disambiguate C++ clang modules from
2355   // C++ standard modules (other than use/non-use of Header Units).
2356 
2357   Module *ModuleToImport = SuggestedModule.getModule();
2358 
2359   bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&
2360                                !ModuleToImport->isForBuilding(getLangOpts());
2361 
2362   // Maybe a usable Header Unit
2363   bool UsableHeaderUnit = false;
2364   if (getLangOpts().CPlusPlusModules && ModuleToImport &&
2365       ModuleToImport->isHeaderUnit()) {
2366     if (TrackGMFState.inGMF() || IsImportDecl)
2367       UsableHeaderUnit = true;
2368     else if (!IsImportDecl) {
2369       // This is a Header Unit that we do not include-translate
2370       ModuleToImport = nullptr;
2371     }
2372   }
2373   // Maybe a usable clang header module.
2374   bool UsableClangHeaderModule =
2375       (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
2376       ModuleToImport && !ModuleToImport->isHeaderUnit();
2377 
2378   // Determine whether we should try to import the module for this #include, if
2379   // there is one. Don't do so if precompiled module support is disabled or we
2380   // are processing this module textually (because we're building the module).
2381   if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2382     // If this include corresponds to a module but that module is
2383     // unavailable, diagnose the situation and bail out.
2384     // FIXME: Remove this; loadModule does the same check (but produces
2385     // slightly worse diagnostics).
2386     if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), *ModuleToImport,
2387                                getDiagnostics())) {
2388       Diag(FilenameTok.getLocation(),
2389            diag::note_implicit_top_level_module_import_here)
2390           << ModuleToImport->getTopLevelModuleName();
2391       return {ImportAction::None};
2392     }
2393 
2394     // Compute the module access path corresponding to this module.
2395     // FIXME: Should we have a second loadModule() overload to avoid this
2396     // extra lookup step?
2397     SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
2398     for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)
2399       Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
2400                                     FilenameTok.getLocation()));
2401     std::reverse(Path.begin(), Path.end());
2402 
2403     // Warn that we're replacing the include/import with a module import.
2404     if (!IsImportDecl)
2405       diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
2406 
2407     // Load the module to import its macros. We'll make the declarations
2408     // visible when the parser gets here.
2409     // FIXME: Pass ModuleToImport in here rather than converting it to a path
2410     // and making the module loader convert it back again.
2411     ModuleLoadResult Imported = TheModuleLoader.loadModule(
2412         IncludeTok.getLocation(), Path, Module::Hidden,
2413         /*IsInclusionDirective=*/true);
2414     assert((Imported == nullptr || Imported == ModuleToImport) &&
2415            "the imported module is different than the suggested one");
2416 
2417     if (Imported) {
2418       Action = Import;
2419     } else if (Imported.isMissingExpected()) {
2420       markClangModuleAsAffecting(
2421           static_cast<Module *>(Imported)->getTopLevelModule());
2422       // We failed to find a submodule that we assumed would exist (because it
2423       // was in the directory of an umbrella header, for instance), but no
2424       // actual module containing it exists (because the umbrella header is
2425       // incomplete).  Treat this as a textual inclusion.
2426       ModuleToImport = nullptr;
2427     } else if (Imported.isConfigMismatch()) {
2428       // On a configuration mismatch, enter the header textually. We still know
2429       // that it's part of the corresponding module.
2430     } else {
2431       // We hit an error processing the import. Bail out.
2432       if (hadModuleLoaderFatalFailure()) {
2433         // With a fatal failure in the module loader, we abort parsing.
2434         Token &Result = IncludeTok;
2435         assert(CurLexer && "#include but no current lexer set!");
2436         Result.startToken();
2437         CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
2438         CurLexer->cutOffLexing();
2439       }
2440       return {ImportAction::None};
2441     }
2442   }
2443 
2444   // The #included file will be considered to be a system header if either it is
2445   // in a system include directory, or if the #includer is a system include
2446   // header.
2447   SrcMgr::CharacteristicKind FileCharacter =
2448       SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
2449   if (File)
2450     FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter);
2451 
2452   // If this is a '#import' or an import-declaration, don't re-enter the file.
2453   //
2454   // FIXME: If we have a suggested module for a '#include', and we've already
2455   // visited this file, don't bother entering it again. We know it has no
2456   // further effect.
2457   bool EnterOnce =
2458       IsImportDecl ||
2459       IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2460 
2461   bool IsFirstIncludeOfFile = false;
2462 
2463   // Ask HeaderInfo if we should enter this #include file.  If not, #including
2464   // this file will have no effect.
2465   if (Action == Enter && File &&
2466       !HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce,
2467                                          getLangOpts().Modules, ModuleToImport,
2468                                          IsFirstIncludeOfFile)) {
2469     // C++ standard modules:
2470     // If we are not in the GMF, then we textually include only
2471     // clang modules:
2472     // Even if we've already preprocessed this header once and know that we
2473     // don't need to see its contents again, we still need to import it if it's
2474     // modular because we might not have imported it from this submodule before.
2475     //
2476     // FIXME: We don't do this when compiling a PCH because the AST
2477     // serialization layer can't cope with it. This means we get local
2478     // submodule visibility semantics wrong in that case.
2479     if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2480       Action = TrackGMFState.inGMF() ? Import : Skip;
2481     else
2482       Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;
2483   }
2484 
2485   // Check for circular inclusion of the main file.
2486   // We can't generate a consistent preamble with regard to the conditional
2487   // stack if the main file is included again as due to the preamble bounds
2488   // some directives (e.g. #endif of a header guard) will never be seen.
2489   // Since this will lead to confusing errors, avoid the inclusion.
2490   if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2491       SourceMgr.isMainFile(File->getFileEntry())) {
2492     Diag(FilenameTok.getLocation(),
2493          diag::err_pp_including_mainfile_in_preamble);
2494     return {ImportAction::None};
2495   }
2496 
2497   if (Callbacks && !IsImportDecl) {
2498     // Notify the callback object that we've seen an inclusion directive.
2499     // FIXME: Use a different callback for a pp-import?
2500     Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled,
2501                                   FilenameRange, File, SearchPath, RelativePath,
2502                                   SuggestedModule.getModule(), Action == Import,
2503                                   FileCharacter);
2504     if (Action == Skip && File)
2505       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
2506   }
2507 
2508   if (!File)
2509     return {ImportAction::None};
2510 
2511   // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2512   // module corresponding to the named header.
2513   if (IsImportDecl && !ModuleToImport) {
2514     Diag(FilenameTok, diag::err_header_import_not_header_unit)
2515       << OriginalFilename << File->getName();
2516     return {ImportAction::None};
2517   }
2518 
2519   // Issue a diagnostic if the name of the file on disk has a different case
2520   // than the one we're about to open.
2521   const bool CheckIncludePathPortability =
2522       !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2523 
2524   if (CheckIncludePathPortability) {
2525     StringRef Name = LookupFilename;
2526     StringRef NameWithoriginalSlashes = Filename;
2527 #if defined(_WIN32)
2528     // Skip UNC prefix if present. (tryGetRealPathName() always
2529     // returns a path with the prefix skipped.)
2530     bool NameWasUNC = Name.consume_front("\\\\?\\");
2531     NameWithoriginalSlashes.consume_front("\\\\?\\");
2532 #endif
2533     StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2534     SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
2535                                           llvm::sys::path::end(Name));
2536 #if defined(_WIN32)
2537     // -Wnonportable-include-path is designed to diagnose includes using
2538     // case even on systems with a case-insensitive file system.
2539     // On Windows, RealPathName always starts with an upper-case drive
2540     // letter for absolute paths, but Name might start with either
2541     // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2542     // ("foo" will always have on-disk case, no matter which case was
2543     // used in the cd command). To not emit this warning solely for
2544     // the drive letter, whose case is dependent on if `cd` is used
2545     // with upper- or lower-case drive letters, always consider the
2546     // given drive letter case as correct for the purpose of this warning.
2547     SmallString<128> FixedDriveRealPath;
2548     if (llvm::sys::path::is_absolute(Name) &&
2549         llvm::sys::path::is_absolute(RealPathName) &&
2550         toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2551         isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2552       assert(Components.size() >= 3 && "should have drive, backslash, name");
2553       assert(Components[0].size() == 2 && "should start with drive");
2554       assert(Components[0][1] == ':' && "should have colon");
2555       FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2556       RealPathName = FixedDriveRealPath;
2557     }
2558 #endif
2559 
2560     if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {
2561       SmallString<128> Path;
2562       Path.reserve(Name.size()+2);
2563       Path.push_back(isAngled ? '<' : '"');
2564 
2565       const auto IsSep = [BackslashStyle](char c) {
2566         return llvm::sys::path::is_separator(c, BackslashStyle);
2567       };
2568 
2569       for (auto Component : Components) {
2570         // On POSIX, Components will contain a single '/' as first element
2571         // exactly if Name is an absolute path.
2572         // On Windows, it will contain "C:" followed by '\' for absolute paths.
2573         // The drive letter is optional for absolute paths on Windows, but
2574         // clang currently cannot process absolute paths in #include lines that
2575         // don't have a drive.
2576         // If the first entry in Components is a directory separator,
2577         // then the code at the bottom of this loop that keeps the original
2578         // directory separator style copies it. If the second entry is
2579         // a directory separator (the C:\ case), then that separator already
2580         // got copied when the C: was processed and we want to skip that entry.
2581         if (!(Component.size() == 1 && IsSep(Component[0])))
2582           Path.append(Component);
2583         else if (Path.size() != 1)
2584           continue;
2585 
2586         // Append the separator(s) the user used, or the close quote
2587         if (Path.size() > NameWithoriginalSlashes.size()) {
2588           Path.push_back(isAngled ? '>' : '"');
2589           continue;
2590         }
2591         assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2592         do
2593           Path.push_back(NameWithoriginalSlashes[Path.size()-1]);
2594         while (Path.size() <= NameWithoriginalSlashes.size() &&
2595                IsSep(NameWithoriginalSlashes[Path.size()-1]));
2596       }
2597 
2598 #if defined(_WIN32)
2599       // Restore UNC prefix if it was there.
2600       if (NameWasUNC)
2601         Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2602 #endif
2603 
2604       // For user files and known standard headers, issue a diagnostic.
2605       // For other system headers, don't. They can be controlled separately.
2606       auto DiagId =
2607           (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2608               ? diag::pp_nonportable_path
2609               : diag::pp_nonportable_system_path;
2610       Diag(FilenameTok, DiagId) << Path <<
2611         FixItHint::CreateReplacement(FilenameRange, Path);
2612     }
2613   }
2614 
2615   switch (Action) {
2616   case Skip:
2617     // If we don't need to enter the file, stop now.
2618     if (ModuleToImport)
2619       return {ImportAction::SkippedModuleImport, ModuleToImport};
2620     return {ImportAction::None};
2621 
2622   case IncludeLimitReached:
2623     // If we reached our include limit and don't want to enter any more files,
2624     // don't go any further.
2625     return {ImportAction::None};
2626 
2627   case Import: {
2628     // If this is a module import, make it visible if needed.
2629     assert(ModuleToImport && "no module to import");
2630 
2631     makeModuleVisible(ModuleToImport, EndLoc);
2632 
2633     if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2634         tok::pp___include_macros)
2635       return {ImportAction::None};
2636 
2637     return {ImportAction::ModuleImport, ModuleToImport};
2638   }
2639 
2640   case Enter:
2641     break;
2642   }
2643 
2644   // Check that we don't have infinite #include recursion.
2645   if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2646     Diag(FilenameTok, diag::err_pp_include_too_deep);
2647     HasReachedMaxIncludeDepth = true;
2648     return {ImportAction::None};
2649   }
2650 
2651   if (isAngled && isInNamedModule())
2652     Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview)
2653         << getNamedModuleName();
2654 
2655   // Look up the file, create a File ID for it.
2656   SourceLocation IncludePos = FilenameTok.getLocation();
2657   // If the filename string was the result of macro expansions, set the include
2658   // position on the file where it will be included and after the expansions.
2659   if (IncludePos.isMacroID())
2660     IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
2661   FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
2662   if (!FID.isValid()) {
2663     TheModuleLoader.HadFatalFailure = true;
2664     return ImportAction::Failure;
2665   }
2666 
2667   // If all is good, enter the new file!
2668   if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),
2669                       IsFirstIncludeOfFile))
2670     return {ImportAction::None};
2671 
2672   // Determine if we're switching to building a new submodule, and which one.
2673   // This does not apply for C++20 modules header units.
2674   if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {
2675     if (ModuleToImport->getTopLevelModule()->ShadowingModule) {
2676       // We are building a submodule that belongs to a shadowed module. This
2677       // means we find header files in the shadowed module.
2678       Diag(ModuleToImport->DefinitionLoc,
2679            diag::err_module_build_shadowed_submodule)
2680           << ModuleToImport->getFullModuleName();
2681       Diag(ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2682            diag::note_previous_definition);
2683       return {ImportAction::None};
2684     }
2685     // When building a pch, -fmodule-name tells the compiler to textually
2686     // include headers in the specified module. We are not building the
2687     // specified module.
2688     //
2689     // FIXME: This is the wrong way to handle this. We should produce a PCH
2690     // that behaves the same as the header would behave in a compilation using
2691     // that PCH, which means we should enter the submodule. We need to teach
2692     // the AST serialization layer to deal with the resulting AST.
2693     if (getLangOpts().CompilingPCH &&
2694         ModuleToImport->isForBuilding(getLangOpts()))
2695       return {ImportAction::None};
2696 
2697     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2698     CurLexerSubmodule = ModuleToImport;
2699 
2700     // Let the macro handling code know that any future macros are within
2701     // the new submodule.
2702     EnterSubmodule(ModuleToImport, EndLoc, /*ForPragma*/ false);
2703 
2704     // Let the parser know that any future declarations are within the new
2705     // submodule.
2706     // FIXME: There's no point doing this if we're handling a #__include_macros
2707     // directive.
2708     return {ImportAction::ModuleBegin, ModuleToImport};
2709   }
2710 
2711   assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2712   return {ImportAction::None};
2713 }
2714 
2715 /// HandleIncludeNextDirective - Implements \#include_next.
2716 ///
2717 void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2718                                               Token &IncludeNextTok) {
2719   Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2720 
2721   ConstSearchDirIterator Lookup = nullptr;
2722   const FileEntry *LookupFromFile;
2723   std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2724 
2725   return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
2726                                 LookupFromFile);
2727 }
2728 
2729 /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
2730 void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2731   // The Microsoft #import directive takes a type library and generates header
2732   // files from it, and includes those.  This is beyond the scope of what clang
2733   // does, so we ignore it and error out.  However, #import can optionally have
2734   // trailing attributes that span multiple lines.  We're going to eat those
2735   // so we can continue processing from there.
2736   Diag(Tok, diag::err_pp_import_directive_ms );
2737 
2738   // Read tokens until we get to the end of the directive.  Note that the
2739   // directive can be split over multiple lines using the backslash character.
2740   DiscardUntilEndOfDirective();
2741 }
2742 
2743 /// HandleImportDirective - Implements \#import.
2744 ///
2745 void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2746                                          Token &ImportTok) {
2747   if (!LangOpts.ObjC) {  // #import is standard for ObjC.
2748     if (LangOpts.MSVCCompat)
2749       return HandleMicrosoftImportDirective(ImportTok);
2750     Diag(ImportTok, diag::ext_pp_import_directive);
2751   }
2752   return HandleIncludeDirective(HashLoc, ImportTok);
2753 }
2754 
2755 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2756 /// pseudo directive in the predefines buffer.  This handles it by sucking all
2757 /// tokens through the preprocessor and discarding them (only keeping the side
2758 /// effects on the preprocessor).
2759 void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2760                                                 Token &IncludeMacrosTok) {
2761   // This directive should only occur in the predefines buffer.  If not, emit an
2762   // error and reject it.
2763   SourceLocation Loc = IncludeMacrosTok.getLocation();
2764   if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2765     Diag(IncludeMacrosTok.getLocation(),
2766          diag::pp_include_macros_out_of_predefines);
2767     DiscardUntilEndOfDirective();
2768     return;
2769   }
2770 
2771   // Treat this as a normal #include for checking purposes.  If this is
2772   // successful, it will push a new lexer onto the include stack.
2773   HandleIncludeDirective(HashLoc, IncludeMacrosTok);
2774 
2775   Token TmpTok;
2776   do {
2777     Lex(TmpTok);
2778     assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2779   } while (TmpTok.isNot(tok::hashhash));
2780 }
2781 
2782 //===----------------------------------------------------------------------===//
2783 // Preprocessor Macro Directive Handling.
2784 //===----------------------------------------------------------------------===//
2785 
2786 /// ReadMacroParameterList - The ( starting a parameter list of a macro
2787 /// definition has just been read.  Lex the rest of the parameters and the
2788 /// closing ), updating MI with what we learn.  Return true if an error occurs
2789 /// parsing the param list.
2790 bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2791   SmallVector<IdentifierInfo*, 32> Parameters;
2792 
2793   while (true) {
2794     LexUnexpandedNonComment(Tok);
2795     switch (Tok.getKind()) {
2796     case tok::r_paren:
2797       // Found the end of the parameter list.
2798       if (Parameters.empty())  // #define FOO()
2799         return false;
2800       // Otherwise we have #define FOO(A,)
2801       Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2802       return true;
2803     case tok::ellipsis:  // #define X(... -> C99 varargs
2804       if (!LangOpts.C99)
2805         Diag(Tok, LangOpts.CPlusPlus11 ?
2806              diag::warn_cxx98_compat_variadic_macro :
2807              diag::ext_variadic_macro);
2808 
2809       // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2810       if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2811         Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2812       }
2813 
2814       // Lex the token after the identifier.
2815       LexUnexpandedNonComment(Tok);
2816       if (Tok.isNot(tok::r_paren)) {
2817         Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2818         return true;
2819       }
2820       // Add the __VA_ARGS__ identifier as a parameter.
2821       Parameters.push_back(Ident__VA_ARGS__);
2822       MI->setIsC99Varargs();
2823       MI->setParameterList(Parameters, BP);
2824       return false;
2825     case tok::eod:  // #define X(
2826       Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2827       return true;
2828     default:
2829       // Handle keywords and identifiers here to accept things like
2830       // #define Foo(for) for.
2831       IdentifierInfo *II = Tok.getIdentifierInfo();
2832       if (!II) {
2833         // #define X(1
2834         Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2835         return true;
2836       }
2837 
2838       // If this is already used as a parameter, it is used multiple times (e.g.
2839       // #define X(A,A.
2840       if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6
2841         Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2842         return true;
2843       }
2844 
2845       // Add the parameter to the macro info.
2846       Parameters.push_back(II);
2847 
2848       // Lex the token after the identifier.
2849       LexUnexpandedNonComment(Tok);
2850 
2851       switch (Tok.getKind()) {
2852       default:          // #define X(A B
2853         Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2854         return true;
2855       case tok::r_paren: // #define X(A)
2856         MI->setParameterList(Parameters, BP);
2857         return false;
2858       case tok::comma:  // #define X(A,
2859         break;
2860       case tok::ellipsis:  // #define X(A... -> GCC extension
2861         // Diagnose extension.
2862         Diag(Tok, diag::ext_named_variadic_macro);
2863 
2864         // Lex the token after the identifier.
2865         LexUnexpandedNonComment(Tok);
2866         if (Tok.isNot(tok::r_paren)) {
2867           Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2868           return true;
2869         }
2870 
2871         MI->setIsGNUVarargs();
2872         MI->setParameterList(Parameters, BP);
2873         return false;
2874       }
2875     }
2876   }
2877 }
2878 
2879 static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2880                                    const LangOptions &LOptions) {
2881   if (MI->getNumTokens() == 1) {
2882     const Token &Value = MI->getReplacementToken(0);
2883 
2884     // Macro that is identity, like '#define inline inline' is a valid pattern.
2885     if (MacroName.getKind() == Value.getKind())
2886       return true;
2887 
2888     // Macro that maps a keyword to the same keyword decorated with leading/
2889     // trailing underscores is a valid pattern:
2890     //    #define inline __inline
2891     //    #define inline __inline__
2892     //    #define inline _inline (in MS compatibility mode)
2893     StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2894     if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2895       if (!II->isKeyword(LOptions))
2896         return false;
2897       StringRef ValueText = II->getName();
2898       StringRef TrimmedValue = ValueText;
2899       if (!ValueText.starts_with("__")) {
2900         if (ValueText.starts_with("_"))
2901           TrimmedValue = TrimmedValue.drop_front(1);
2902         else
2903           return false;
2904       } else {
2905         TrimmedValue = TrimmedValue.drop_front(2);
2906         if (TrimmedValue.ends_with("__"))
2907           TrimmedValue = TrimmedValue.drop_back(2);
2908       }
2909       return TrimmedValue == MacroText;
2910     } else {
2911       return false;
2912     }
2913   }
2914 
2915   // #define inline
2916   return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
2917                            tok::kw_const) &&
2918          MI->getNumTokens() == 0;
2919 }
2920 
2921 // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2922 // entire line) of the macro's tokens and adds them to MacroInfo, and while
2923 // doing so performs certain validity checks including (but not limited to):
2924 //   - # (stringization) is followed by a macro parameter
2925 //
2926 //  Returns a nullptr if an invalid sequence of tokens is encountered or returns
2927 //  a pointer to a MacroInfo object.
2928 
2929 MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2930     const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2931 
2932   Token LastTok = MacroNameTok;
2933   // Create the new macro.
2934   MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
2935 
2936   Token Tok;
2937   LexUnexpandedToken(Tok);
2938 
2939   // Ensure we consume the rest of the macro body if errors occur.
2940   auto _ = llvm::make_scope_exit([&]() {
2941     // The flag indicates if we are still waiting for 'eod'.
2942     if (CurLexer->ParsingPreprocessorDirective)
2943       DiscardUntilEndOfDirective();
2944   });
2945 
2946   // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2947   // within their appropriate context.
2948   VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2949 
2950   // If this is a function-like macro definition, parse the argument list,
2951   // marking each of the identifiers as being used as macro arguments.  Also,
2952   // check other constraints on the first token of the macro body.
2953   if (Tok.is(tok::eod)) {
2954     if (ImmediatelyAfterHeaderGuard) {
2955       // Save this macro information since it may part of a header guard.
2956       CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
2957                                         MacroNameTok.getLocation());
2958     }
2959     // If there is no body to this macro, we have no special handling here.
2960   } else if (Tok.hasLeadingSpace()) {
2961     // This is a normal token with leading space.  Clear the leading space
2962     // marker on the first token to get proper expansion.
2963     Tok.clearFlag(Token::LeadingSpace);
2964   } else if (Tok.is(tok::l_paren)) {
2965     // This is a function-like macro definition.  Read the argument list.
2966     MI->setIsFunctionLike();
2967     if (ReadMacroParameterList(MI, LastTok))
2968       return nullptr;
2969 
2970     // If this is a definition of an ISO C/C++ variadic function-like macro (not
2971     // using the GNU named varargs extension) inform our variadic scope guard
2972     // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2973     // allowed only within the definition of a variadic macro.
2974 
2975     if (MI->isC99Varargs()) {
2976       VariadicMacroScopeGuard.enterScope();
2977     }
2978 
2979     // Read the first token after the arg list for down below.
2980     LexUnexpandedToken(Tok);
2981   } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2982     // C99 requires whitespace between the macro definition and the body.  Emit
2983     // a diagnostic for something like "#define X+".
2984     Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2985   } else {
2986     // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2987     // first character of a replacement list is not a character required by
2988     // subclause 5.2.1, then there shall be white-space separation between the
2989     // identifier and the replacement list.".  5.2.1 lists this set:
2990     //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
2991     // is irrelevant here.
2992     bool isInvalid = false;
2993     if (Tok.is(tok::at)) // @ is not in the list above.
2994       isInvalid = true;
2995     else if (Tok.is(tok::unknown)) {
2996       // If we have an unknown token, it is something strange like "`".  Since
2997       // all of valid characters would have lexed into a single character
2998       // token of some sort, we know this is not a valid case.
2999       isInvalid = true;
3000     }
3001     if (isInvalid)
3002       Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
3003     else
3004       Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
3005   }
3006 
3007   if (!Tok.is(tok::eod))
3008     LastTok = Tok;
3009 
3010   SmallVector<Token, 16> Tokens;
3011 
3012   // Read the rest of the macro body.
3013   if (MI->isObjectLike()) {
3014     // Object-like macros are very simple, just read their body.
3015     while (Tok.isNot(tok::eod)) {
3016       LastTok = Tok;
3017       Tokens.push_back(Tok);
3018       // Get the next token of the macro.
3019       LexUnexpandedToken(Tok);
3020     }
3021   } else {
3022     // Otherwise, read the body of a function-like macro.  While we are at it,
3023     // check C99 6.10.3.2p1: ensure that # operators are followed by macro
3024     // parameters in function-like macro expansions.
3025 
3026     VAOptDefinitionContext VAOCtx(*this);
3027 
3028     while (Tok.isNot(tok::eod)) {
3029       LastTok = Tok;
3030 
3031       if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
3032         Tokens.push_back(Tok);
3033 
3034         if (VAOCtx.isVAOptToken(Tok)) {
3035           // If we're already within a VAOPT, emit an error.
3036           if (VAOCtx.isInVAOpt()) {
3037             Diag(Tok, diag::err_pp_vaopt_nested_use);
3038             return nullptr;
3039           }
3040           // Ensure VAOPT is followed by a '(' .
3041           LexUnexpandedToken(Tok);
3042           if (Tok.isNot(tok::l_paren)) {
3043             Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
3044             return nullptr;
3045           }
3046           Tokens.push_back(Tok);
3047           VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
3048           LexUnexpandedToken(Tok);
3049           if (Tok.is(tok::hashhash)) {
3050             Diag(Tok, diag::err_vaopt_paste_at_start);
3051             return nullptr;
3052           }
3053           continue;
3054         } else if (VAOCtx.isInVAOpt()) {
3055           if (Tok.is(tok::r_paren)) {
3056             if (VAOCtx.sawClosingParen()) {
3057               assert(Tokens.size() >= 3 &&
3058                      "Must have seen at least __VA_OPT__( "
3059                      "and a subsequent tok::r_paren");
3060               if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
3061                 Diag(Tok, diag::err_vaopt_paste_at_end);
3062                 return nullptr;
3063               }
3064             }
3065           } else if (Tok.is(tok::l_paren)) {
3066             VAOCtx.sawOpeningParen(Tok.getLocation());
3067           }
3068         }
3069         // Get the next token of the macro.
3070         LexUnexpandedToken(Tok);
3071         continue;
3072       }
3073 
3074       // If we're in -traditional mode, then we should ignore stringification
3075       // and token pasting. Mark the tokens as unknown so as not to confuse
3076       // things.
3077       if (getLangOpts().TraditionalCPP) {
3078         Tok.setKind(tok::unknown);
3079         Tokens.push_back(Tok);
3080 
3081         // Get the next token of the macro.
3082         LexUnexpandedToken(Tok);
3083         continue;
3084       }
3085 
3086       if (Tok.is(tok::hashhash)) {
3087         // If we see token pasting, check if it looks like the gcc comma
3088         // pasting extension.  We'll use this information to suppress
3089         // diagnostics later on.
3090 
3091         // Get the next token of the macro.
3092         LexUnexpandedToken(Tok);
3093 
3094         if (Tok.is(tok::eod)) {
3095           Tokens.push_back(LastTok);
3096           break;
3097         }
3098 
3099         if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3100             Tokens[Tokens.size() - 1].is(tok::comma))
3101           MI->setHasCommaPasting();
3102 
3103         // Things look ok, add the '##' token to the macro.
3104         Tokens.push_back(LastTok);
3105         continue;
3106       }
3107 
3108       // Our Token is a stringization operator.
3109       // Get the next token of the macro.
3110       LexUnexpandedToken(Tok);
3111 
3112       // Check for a valid macro arg identifier or __VA_OPT__.
3113       if (!VAOCtx.isVAOptToken(Tok) &&
3114           (Tok.getIdentifierInfo() == nullptr ||
3115            MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
3116 
3117         // If this is assembler-with-cpp mode, we accept random gibberish after
3118         // the '#' because '#' is often a comment character.  However, change
3119         // the kind of the token to tok::unknown so that the preprocessor isn't
3120         // confused.
3121         if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
3122           LastTok.setKind(tok::unknown);
3123           Tokens.push_back(LastTok);
3124           continue;
3125         } else {
3126           Diag(Tok, diag::err_pp_stringize_not_parameter)
3127             << LastTok.is(tok::hashat);
3128           return nullptr;
3129         }
3130       }
3131 
3132       // Things look ok, add the '#' and param name tokens to the macro.
3133       Tokens.push_back(LastTok);
3134 
3135       // If the token following '#' is VAOPT, let the next iteration handle it
3136       // and check it for correctness, otherwise add the token and prime the
3137       // loop with the next one.
3138       if (!VAOCtx.isVAOptToken(Tok)) {
3139         Tokens.push_back(Tok);
3140         LastTok = Tok;
3141 
3142         // Get the next token of the macro.
3143         LexUnexpandedToken(Tok);
3144       }
3145     }
3146     if (VAOCtx.isInVAOpt()) {
3147       assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3148       Diag(Tok, diag::err_pp_expected_after)
3149         << LastTok.getKind() << tok::r_paren;
3150       Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
3151       return nullptr;
3152     }
3153   }
3154   MI->setDefinitionEndLoc(LastTok.getLocation());
3155 
3156   MI->setTokens(Tokens, BP);
3157   return MI;
3158 }
3159 
3160 static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3161   return II->isStr("__strong") || II->isStr("__weak") ||
3162          II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing");
3163 }
3164 
3165 /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
3166 /// line then lets the caller lex the next real token.
3167 void Preprocessor::HandleDefineDirective(
3168     Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3169   ++NumDefined;
3170 
3171   Token MacroNameTok;
3172   bool MacroShadowsKeyword;
3173   ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
3174 
3175   // Error reading macro name?  If so, diagnostic already issued.
3176   if (MacroNameTok.is(tok::eod))
3177     return;
3178 
3179   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3180   // Issue a final pragma warning if we're defining a macro that was has been
3181   // undefined and is being redefined.
3182   if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3183     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3184 
3185   // If we are supposed to keep comments in #defines, reenable comment saving
3186   // mode.
3187   if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3188 
3189   MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3190       MacroNameTok, ImmediatelyAfterHeaderGuard);
3191 
3192   if (!MI) return;
3193 
3194   if (MacroShadowsKeyword &&
3195       !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
3196     Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
3197   }
3198   // Check that there is no paste (##) operator at the beginning or end of the
3199   // replacement list.
3200   unsigned NumTokens = MI->getNumTokens();
3201   if (NumTokens != 0) {
3202     if (MI->getReplacementToken(0).is(tok::hashhash)) {
3203       Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
3204       return;
3205     }
3206     if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
3207       Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
3208       return;
3209     }
3210   }
3211 
3212   // When skipping just warn about macros that do not match.
3213   if (SkippingUntilPCHThroughHeader) {
3214     const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
3215     if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
3216                              /*Syntactic=*/LangOpts.MicrosoftExt))
3217       Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
3218           << MacroNameTok.getIdentifierInfo();
3219     // Issue the diagnostic but allow the change if msvc extensions are enabled
3220     if (!LangOpts.MicrosoftExt)
3221       return;
3222   }
3223 
3224   // Finally, if this identifier already had a macro defined for it, verify that
3225   // the macro bodies are identical, and issue diagnostics if they are not.
3226   if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
3227     // Final macros are hard-mode: they always warn. Even if the bodies are
3228     // identical. Even if they are in system headers. Even if they are things we
3229     // would silently allow in the past.
3230     if (MacroNameTok.getIdentifierInfo()->isFinal())
3231       emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3232 
3233     // In Objective-C, ignore attempts to directly redefine the builtin
3234     // definitions of the ownership qualifiers.  It's still possible to
3235     // #undef them.
3236     if (getLangOpts().ObjC &&
3237         SourceMgr.getFileID(OtherMI->getDefinitionLoc()) ==
3238             getPredefinesFileID() &&
3239         isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
3240       // Warn if it changes the tokens.
3241       if ((!getDiagnostics().getSuppressSystemWarnings() ||
3242            !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
3243           !MI->isIdenticalTo(*OtherMI, *this,
3244                              /*Syntactic=*/LangOpts.MicrosoftExt)) {
3245         Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
3246       }
3247       assert(!OtherMI->isWarnIfUnused());
3248       return;
3249     }
3250 
3251     // It is very common for system headers to have tons of macro redefinitions
3252     // and for warnings to be disabled in system headers.  If this is the case,
3253     // then don't bother calling MacroInfo::isIdenticalTo.
3254     if (!getDiagnostics().getSuppressSystemWarnings() ||
3255         !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
3256 
3257       if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3258         Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
3259 
3260       // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3261       // C++ [cpp.predefined]p4, but allow it as an extension.
3262       if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName()))
3263         Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
3264       // Macros must be identical.  This means all tokens and whitespace
3265       // separation must be the same.  C99 6.10.3p2.
3266       else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3267                !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
3268         Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
3269           << MacroNameTok.getIdentifierInfo();
3270         Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
3271       }
3272     }
3273     if (OtherMI->isWarnIfUnused())
3274       WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
3275   }
3276 
3277   DefMacroDirective *MD =
3278       appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
3279 
3280   assert(!MI->isUsed());
3281   // If we need warning for not using the macro, add its location in the
3282   // warn-because-unused-macro set. If it gets used it will be removed from set.
3283   if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
3284       !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
3285       !MacroExpansionInDirectivesOverride &&
3286       getSourceManager().getFileID(MI->getDefinitionLoc()) !=
3287           getPredefinesFileID()) {
3288     MI->setIsWarnIfUnused(true);
3289     WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
3290   }
3291 
3292   // If the callbacks want to know, tell them about the macro definition.
3293   if (Callbacks)
3294     Callbacks->MacroDefined(MacroNameTok, MD);
3295 
3296   // If we're in MS compatibility mode and the macro being defined is the
3297   // assert macro, implicitly add a macro definition for static_assert to work
3298   // around their broken assert.h header file in C. Only do so if there isn't
3299   // already a static_assert macro defined.
3300   if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&
3301       MacroNameTok.getIdentifierInfo()->isStr("assert") &&
3302       !isMacroDefined("static_assert")) {
3303     MacroInfo *MI = AllocateMacroInfo(SourceLocation());
3304 
3305     Token Tok;
3306     Tok.startToken();
3307     Tok.setKind(tok::kw__Static_assert);
3308     Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));
3309     MI->setTokens({Tok}, BP);
3310     (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);
3311   }
3312 }
3313 
3314 /// HandleUndefDirective - Implements \#undef.
3315 ///
3316 void Preprocessor::HandleUndefDirective() {
3317   ++NumUndefined;
3318 
3319   Token MacroNameTok;
3320   ReadMacroName(MacroNameTok, MU_Undef);
3321 
3322   // Error reading macro name?  If so, diagnostic already issued.
3323   if (MacroNameTok.is(tok::eod))
3324     return;
3325 
3326   // Check to see if this is the last token on the #undef line.
3327   CheckEndOfDirective("undef");
3328 
3329   // Okay, we have a valid identifier to undef.
3330   auto *II = MacroNameTok.getIdentifierInfo();
3331   auto MD = getMacroDefinition(II);
3332   UndefMacroDirective *Undef = nullptr;
3333 
3334   if (II->isFinal())
3335     emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);
3336 
3337   // If the macro is not defined, this is a noop undef.
3338   if (const MacroInfo *MI = MD.getMacroInfo()) {
3339     if (!MI->isUsed() && MI->isWarnIfUnused())
3340       Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
3341 
3342     // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3343     // C++ [cpp.predefined]p4, but allow it as an extension.
3344     if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName()))
3345       Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
3346 
3347     if (MI->isWarnIfUnused())
3348       WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
3349 
3350     Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
3351   }
3352 
3353   // If the callbacks want to know, tell them about the macro #undef.
3354   // Note: no matter if the macro was defined or not.
3355   if (Callbacks)
3356     Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3357 
3358   if (Undef)
3359     appendMacroDirective(II, Undef);
3360 }
3361 
3362 //===----------------------------------------------------------------------===//
3363 // Preprocessor Conditional Directive Handling.
3364 //===----------------------------------------------------------------------===//
3365 
3366 /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive.  isIfndef
3367 /// is true when this is a \#ifndef directive.  ReadAnyTokensBeforeDirective is
3368 /// true if any tokens have been returned or pp-directives activated before this
3369 /// \#ifndef has been lexed.
3370 ///
3371 void Preprocessor::HandleIfdefDirective(Token &Result,
3372                                         const Token &HashToken,
3373                                         bool isIfndef,
3374                                         bool ReadAnyTokensBeforeDirective) {
3375   ++NumIf;
3376   Token DirectiveTok = Result;
3377 
3378   Token MacroNameTok;
3379   ReadMacroName(MacroNameTok);
3380 
3381   // Error reading macro name?  If so, diagnostic already issued.
3382   if (MacroNameTok.is(tok::eod)) {
3383     // Skip code until we get to #endif.  This helps with recovery by not
3384     // emitting an error when the #endif is reached.
3385     SkipExcludedConditionalBlock(HashToken.getLocation(),
3386                                  DirectiveTok.getLocation(),
3387                                  /*Foundnonskip*/ false, /*FoundElse*/ false);
3388     return;
3389   }
3390 
3391   emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true);
3392 
3393   // Check to see if this is the last token on the #if[n]def line.
3394   CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
3395 
3396   IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3397   auto MD = getMacroDefinition(MII);
3398   MacroInfo *MI = MD.getMacroInfo();
3399 
3400   if (CurPPLexer->getConditionalStackDepth() == 0) {
3401     // If the start of a top-level #ifdef and if the macro is not defined,
3402     // inform MIOpt that this might be the start of a proper include guard.
3403     // Otherwise it is some other form of unknown conditional which we can't
3404     // handle.
3405     if (!ReadAnyTokensBeforeDirective && !MI) {
3406       assert(isIfndef && "#ifdef shouldn't reach here");
3407       CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
3408     } else
3409       CurPPLexer->MIOpt.EnterTopLevelConditional();
3410   }
3411 
3412   // If there is a macro, process it.
3413   if (MI)  // Mark it used.
3414     markMacroAsUsed(MI);
3415 
3416   if (Callbacks) {
3417     if (isIfndef)
3418       Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
3419     else
3420       Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
3421   }
3422 
3423   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3424     getSourceManager().isInMainFile(DirectiveTok.getLocation());
3425 
3426   // Should we include the stuff contained by this directive?
3427   if (PPOpts->SingleFileParseMode && !MI) {
3428     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3429     // the directive blocks.
3430     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3431                                      /*wasskip*/false, /*foundnonskip*/false,
3432                                      /*foundelse*/false);
3433   } else if (!MI == isIfndef || RetainExcludedCB) {
3434     // Yes, remember that we are inside a conditional, then lex the next token.
3435     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3436                                      /*wasskip*/false, /*foundnonskip*/true,
3437                                      /*foundelse*/false);
3438   } else {
3439     // No, skip the contents of this block.
3440     SkipExcludedConditionalBlock(HashToken.getLocation(),
3441                                  DirectiveTok.getLocation(),
3442                                  /*Foundnonskip*/ false,
3443                                  /*FoundElse*/ false);
3444   }
3445 }
3446 
3447 /// HandleIfDirective - Implements the \#if directive.
3448 ///
3449 void Preprocessor::HandleIfDirective(Token &IfToken,
3450                                      const Token &HashToken,
3451                                      bool ReadAnyTokensBeforeDirective) {
3452   ++NumIf;
3453 
3454   // Parse and evaluate the conditional expression.
3455   IdentifierInfo *IfNDefMacro = nullptr;
3456   const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3457   const bool ConditionalTrue = DER.Conditional;
3458   // Lexer might become invalid if we hit code completion point while evaluating
3459   // expression.
3460   if (!CurPPLexer)
3461     return;
3462 
3463   // If this condition is equivalent to #ifndef X, and if this is the first
3464   // directive seen, handle it for the multiple-include optimization.
3465   if (CurPPLexer->getConditionalStackDepth() == 0) {
3466     if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3467       // FIXME: Pass in the location of the macro name, not the 'if' token.
3468       CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
3469     else
3470       CurPPLexer->MIOpt.EnterTopLevelConditional();
3471   }
3472 
3473   if (Callbacks)
3474     Callbacks->If(
3475         IfToken.getLocation(), DER.ExprRange,
3476         (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3477 
3478   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3479     getSourceManager().isInMainFile(IfToken.getLocation());
3480 
3481   // Should we include the stuff contained by this directive?
3482   if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
3483     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3484     // the directive blocks.
3485     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3486                                      /*foundnonskip*/false, /*foundelse*/false);
3487   } else if (ConditionalTrue || RetainExcludedCB) {
3488     // Yes, remember that we are inside a conditional, then lex the next token.
3489     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3490                                    /*foundnonskip*/true, /*foundelse*/false);
3491   } else {
3492     // No, skip the contents of this block.
3493     SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
3494                                  /*Foundnonskip*/ false,
3495                                  /*FoundElse*/ false);
3496   }
3497 }
3498 
3499 /// HandleEndifDirective - Implements the \#endif directive.
3500 ///
3501 void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3502   ++NumEndif;
3503 
3504   // Check that this is the whole directive.
3505   CheckEndOfDirective("endif");
3506 
3507   PPConditionalInfo CondInfo;
3508   if (CurPPLexer->popConditionalLevel(CondInfo)) {
3509     // No conditionals on the stack: this is an #endif without an #if.
3510     Diag(EndifToken, diag::err_pp_endif_without_if);
3511     return;
3512   }
3513 
3514   // If this the end of a top-level #endif, inform MIOpt.
3515   if (CurPPLexer->getConditionalStackDepth() == 0)
3516     CurPPLexer->MIOpt.ExitTopLevelConditional();
3517 
3518   assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3519          "This code should only be reachable in the non-skipping case!");
3520 
3521   if (Callbacks)
3522     Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
3523 }
3524 
3525 /// HandleElseDirective - Implements the \#else directive.
3526 ///
3527 void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3528   ++NumElse;
3529 
3530   // #else directive in a non-skipping conditional... start skipping.
3531   CheckEndOfDirective("else");
3532 
3533   PPConditionalInfo CI;
3534   if (CurPPLexer->popConditionalLevel(CI)) {
3535     Diag(Result, diag::pp_err_else_without_if);
3536     return;
3537   }
3538 
3539   // If this is a top-level #else, inform the MIOpt.
3540   if (CurPPLexer->getConditionalStackDepth() == 0)
3541     CurPPLexer->MIOpt.EnterTopLevelConditional();
3542 
3543   // If this is a #else with a #else before it, report the error.
3544   if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3545 
3546   if (Callbacks)
3547     Callbacks->Else(Result.getLocation(), CI.IfLoc);
3548 
3549   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3550     getSourceManager().isInMainFile(Result.getLocation());
3551 
3552   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3553     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3554     // the directive blocks.
3555     CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
3556                                      /*foundnonskip*/false, /*foundelse*/true);
3557     return;
3558   }
3559 
3560   // Finally, skip the rest of the contents of this block.
3561   SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
3562                                /*Foundnonskip*/ true,
3563                                /*FoundElse*/ true, Result.getLocation());
3564 }
3565 
3566 /// Implements the \#elif, \#elifdef, and \#elifndef directives.
3567 void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3568                                              const Token &HashToken,
3569                                              tok::PPKeywordKind Kind) {
3570   PPElifDiag DirKind = Kind == tok::pp_elif      ? PED_Elif
3571                        : Kind == tok::pp_elifdef ? PED_Elifdef
3572                                                  : PED_Elifndef;
3573   ++NumElse;
3574 
3575   // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3576   switch (DirKind) {
3577   case PED_Elifdef:
3578   case PED_Elifndef:
3579     unsigned DiagID;
3580     if (LangOpts.CPlusPlus)
3581       DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3582                                     : diag::ext_cxx23_pp_directive;
3583     else
3584       DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3585                             : diag::ext_c23_pp_directive;
3586     Diag(ElifToken, DiagID) << DirKind;
3587     break;
3588   default:
3589     break;
3590   }
3591 
3592   // #elif directive in a non-skipping conditional... start skipping.
3593   // We don't care what the condition is, because we will always skip it (since
3594   // the block immediately before it was included).
3595   SourceRange ConditionRange = DiscardUntilEndOfDirective();
3596 
3597   PPConditionalInfo CI;
3598   if (CurPPLexer->popConditionalLevel(CI)) {
3599     Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;
3600     return;
3601   }
3602 
3603   // If this is a top-level #elif, inform the MIOpt.
3604   if (CurPPLexer->getConditionalStackDepth() == 0)
3605     CurPPLexer->MIOpt.EnterTopLevelConditional();
3606 
3607   // If this is a #elif with a #else before it, report the error.
3608   if (CI.FoundElse)
3609     Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;
3610 
3611   if (Callbacks) {
3612     switch (Kind) {
3613     case tok::pp_elif:
3614       Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
3615                       PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
3616       break;
3617     case tok::pp_elifdef:
3618       Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3619       break;
3620     case tok::pp_elifndef:
3621       Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3622       break;
3623     default:
3624       assert(false && "unexpected directive kind");
3625       break;
3626     }
3627   }
3628 
3629   bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3630     getSourceManager().isInMainFile(ElifToken.getLocation());
3631 
3632   if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3633     // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3634     // the directive blocks.
3635     CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
3636                                      /*foundnonskip*/false, /*foundelse*/false);
3637     return;
3638   }
3639 
3640   // Finally, skip the rest of the contents of this block.
3641   SkipExcludedConditionalBlock(
3642       HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
3643       /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
3644 }
3645 
3646 std::optional<LexEmbedParametersResult>
3647 Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
3648   LexEmbedParametersResult Result{};
3649   SmallVector<Token, 2> ParameterTokens;
3650   tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
3651 
3652   auto DiagMismatchedBracesAndSkipToEOD =
3653       [&](tok::TokenKind Expected,
3654           std::pair<tok::TokenKind, SourceLocation> Matches) {
3655         Diag(CurTok, diag::err_expected) << Expected;
3656         Diag(Matches.second, diag::note_matching) << Matches.first;
3657         if (CurTok.isNot(tok::eod))
3658           DiscardUntilEndOfDirective(CurTok);
3659       };
3660 
3661   auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
3662     if (CurTok.isNot(Kind)) {
3663       Diag(CurTok, diag::err_expected) << Kind;
3664       if (CurTok.isNot(tok::eod))
3665         DiscardUntilEndOfDirective(CurTok);
3666       return false;
3667     }
3668     return true;
3669   };
3670 
3671   // C23 6.10:
3672   // pp-parameter-name:
3673   //   pp-standard-parameter
3674   //   pp-prefixed-parameter
3675   //
3676   // pp-standard-parameter:
3677   //   identifier
3678   //
3679   // pp-prefixed-parameter:
3680   //   identifier :: identifier
3681   auto LexPPParameterName = [&]() -> std::optional<std::string> {
3682     // We expect the current token to be an identifier; if it's not, things
3683     // have gone wrong.
3684     if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3685       return std::nullopt;
3686 
3687     const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
3688 
3689     // Lex another token; it is either a :: or we're done with the parameter
3690     // name.
3691     LexNonComment(CurTok);
3692     if (CurTok.is(tok::coloncolon)) {
3693       // We found a ::, so lex another identifier token.
3694       LexNonComment(CurTok);
3695       if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3696         return std::nullopt;
3697 
3698       const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
3699 
3700       // Lex another token so we're past the name.
3701       LexNonComment(CurTok);
3702       return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
3703     }
3704     return Prefix->getName().str();
3705   };
3706 
3707   // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
3708   // this document as an identifier pp_param and an identifier of the form
3709   // __pp_param__ shall behave the same when used as a preprocessor parameter,
3710   // except for the spelling.
3711   auto NormalizeParameterName = [](StringRef Name) {
3712     if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__"))
3713       return Name.substr(2, Name.size() - 4);
3714     return Name;
3715   };
3716 
3717   auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
3718     // we have a limit parameter and its internals are processed using
3719     // evaluation rules from #if.
3720     if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3721       return std::nullopt;
3722 
3723     // We do not consume the ( because EvaluateDirectiveExpression will lex
3724     // the next token for us.
3725     IdentifierInfo *ParameterIfNDef = nullptr;
3726     bool EvaluatedDefined;
3727     DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
3728         ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false);
3729 
3730     if (!LimitEvalResult.Value) {
3731       // If there was an error evaluating the directive expression, we expect
3732       // to be at the end of directive token.
3733       assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
3734       return std::nullopt;
3735     }
3736 
3737     if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3738       return std::nullopt;
3739 
3740     // Eat the ).
3741     LexNonComment(CurTok);
3742 
3743     // C23 6.10.3.2p2: The token defined shall not appear within the constant
3744     // expression.
3745     if (EvaluatedDefined) {
3746       Diag(CurTok, diag::err_defined_in_pp_embed);
3747       return std::nullopt;
3748     }
3749 
3750     if (LimitEvalResult.Value) {
3751       const llvm::APSInt &Result = *LimitEvalResult.Value;
3752       if (Result.isNegative()) {
3753         Diag(CurTok, diag::err_requires_positive_value)
3754             << toString(Result, 10) << /*positive*/ 0;
3755         return std::nullopt;
3756       }
3757       return Result.getLimitedValue();
3758     }
3759     return std::nullopt;
3760   };
3761 
3762   auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
3763     switch (Kind) {
3764     case tok::l_paren:
3765       return tok::r_paren;
3766     case tok::l_brace:
3767       return tok::r_brace;
3768     case tok::l_square:
3769       return tok::r_square;
3770     default:
3771       llvm_unreachable("should not get here");
3772     }
3773   };
3774 
3775   auto LexParenthesizedBalancedTokenSoup =
3776       [&](llvm::SmallVectorImpl<Token> &Tokens) {
3777         std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
3778 
3779         // We expect the current token to be a left paren.
3780         if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3781           return false;
3782         LexNonComment(CurTok); // Eat the (
3783 
3784         bool WaitingForInnerCloseParen = false;
3785         while (CurTok.isNot(tok::eod) &&
3786                (WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) {
3787           switch (CurTok.getKind()) {
3788           default: // Shutting up diagnostics about not fully-covered switch.
3789             break;
3790           case tok::l_paren:
3791             WaitingForInnerCloseParen = true;
3792             [[fallthrough]];
3793           case tok::l_brace:
3794           case tok::l_square:
3795             BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()});
3796             break;
3797           case tok::r_paren:
3798             WaitingForInnerCloseParen = false;
3799             [[fallthrough]];
3800           case tok::r_brace:
3801           case tok::r_square: {
3802             tok::TokenKind Matching =
3803                 GetMatchingCloseBracket(BracketStack.back().first);
3804             if (BracketStack.empty() || CurTok.getKind() != Matching) {
3805               DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
3806               return false;
3807             }
3808             BracketStack.pop_back();
3809           } break;
3810           }
3811           Tokens.push_back(CurTok);
3812           LexNonComment(CurTok);
3813         }
3814 
3815         // When we're done, we want to eat the closing paren.
3816         if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3817           return false;
3818 
3819         LexNonComment(CurTok); // Eat the )
3820         return true;
3821       };
3822 
3823   LexNonComment(CurTok); // Prime the pump.
3824   while (!CurTok.isOneOf(EndTokenKind, tok::eod)) {
3825     SourceLocation ParamStartLoc = CurTok.getLocation();
3826     std::optional<std::string> ParamName = LexPPParameterName();
3827     if (!ParamName)
3828       return std::nullopt;
3829     StringRef Parameter = NormalizeParameterName(*ParamName);
3830 
3831     // Lex the parameters (dependent on the parameter type we want!).
3832     //
3833     // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
3834     // one time in the embed parameter sequence.
3835     if (Parameter == "limit") {
3836       if (Result.MaybeLimitParam)
3837         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3838 
3839       std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
3840       if (!Limit)
3841         return std::nullopt;
3842       Result.MaybeLimitParam =
3843           PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
3844     } else if (Parameter == "clang::offset") {
3845       if (Result.MaybeOffsetParam)
3846         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3847 
3848       std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
3849       if (!Offset)
3850         return std::nullopt;
3851       Result.MaybeOffsetParam = PPEmbedParameterOffset{
3852           *Offset, {ParamStartLoc, CurTok.getLocation()}};
3853     } else if (Parameter == "prefix") {
3854       if (Result.MaybePrefixParam)
3855         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3856 
3857       SmallVector<Token, 4> Soup;
3858       if (!LexParenthesizedBalancedTokenSoup(Soup))
3859         return std::nullopt;
3860       Result.MaybePrefixParam = PPEmbedParameterPrefix{
3861           std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3862     } else if (Parameter == "suffix") {
3863       if (Result.MaybeSuffixParam)
3864         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3865 
3866       SmallVector<Token, 4> Soup;
3867       if (!LexParenthesizedBalancedTokenSoup(Soup))
3868         return std::nullopt;
3869       Result.MaybeSuffixParam = PPEmbedParameterSuffix{
3870           std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3871     } else if (Parameter == "if_empty") {
3872       if (Result.MaybeIfEmptyParam)
3873         Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3874 
3875       SmallVector<Token, 4> Soup;
3876       if (!LexParenthesizedBalancedTokenSoup(Soup))
3877         return std::nullopt;
3878       Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
3879           std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3880     } else {
3881       ++Result.UnrecognizedParams;
3882 
3883       // If there's a left paren, we need to parse a balanced token sequence
3884       // and just eat those tokens.
3885       if (CurTok.is(tok::l_paren)) {
3886         SmallVector<Token, 4> Soup;
3887         if (!LexParenthesizedBalancedTokenSoup(Soup))
3888           return std::nullopt;
3889       }
3890       if (!ForHasEmbed) {
3891         Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter;
3892         return std::nullopt;
3893       }
3894     }
3895   }
3896   return Result;
3897 }
3898 
3899 void Preprocessor::HandleEmbedDirectiveImpl(
3900     SourceLocation HashLoc, const LexEmbedParametersResult &Params,
3901     StringRef BinaryContents) {
3902   if (BinaryContents.empty()) {
3903     // If we have no binary contents, the only thing we need to emit are the
3904     // if_empty tokens, if any.
3905     // FIXME: this loses AST fidelity; nothing in the compiler will see that
3906     // these tokens came from #embed. We have to hack around this when printing
3907     // preprocessed output. The same is true for prefix and suffix tokens.
3908     if (Params.MaybeIfEmptyParam) {
3909       ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
3910       size_t TokCount = Toks.size();
3911       auto NewToks = std::make_unique<Token[]>(TokCount);
3912       llvm::copy(Toks, NewToks.get());
3913       EnterTokenStream(std::move(NewToks), TokCount, true, true);
3914     }
3915     return;
3916   }
3917 
3918   size_t NumPrefixToks = Params.PrefixTokenCount(),
3919          NumSuffixToks = Params.SuffixTokenCount();
3920   size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
3921   size_t CurIdx = 0;
3922   auto Toks = std::make_unique<Token[]>(TotalNumToks);
3923 
3924   // Add the prefix tokens, if any.
3925   if (Params.MaybePrefixParam) {
3926     llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]);
3927     CurIdx += NumPrefixToks;
3928   }
3929 
3930   EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
3931   Data->BinaryData = BinaryContents;
3932 
3933   Toks[CurIdx].startToken();
3934   Toks[CurIdx].setKind(tok::annot_embed);
3935   Toks[CurIdx].setAnnotationRange(HashLoc);
3936   Toks[CurIdx++].setAnnotationValue(Data);
3937 
3938   // Now add the suffix tokens, if any.
3939   if (Params.MaybeSuffixParam) {
3940     llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]);
3941     CurIdx += NumSuffixToks;
3942   }
3943 
3944   assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
3945   EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
3946 }
3947 
3948 void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
3949                                         const FileEntry *LookupFromFile) {
3950   // Give the usual extension/compatibility warnings.
3951   if (LangOpts.C23)
3952     Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
3953   else
3954     Diag(EmbedTok, diag::ext_pp_embed_directive)
3955         << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
3956 
3957   // Parse the filename header
3958   Token FilenameTok;
3959   if (LexHeaderName(FilenameTok))
3960     return;
3961 
3962   if (FilenameTok.isNot(tok::header_name)) {
3963     Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
3964     if (FilenameTok.isNot(tok::eod))
3965       DiscardUntilEndOfDirective();
3966     return;
3967   }
3968 
3969   // Parse the optional sequence of
3970   // directive-parameters:
3971   //     identifier parameter-name-list[opt] directive-argument-list[opt]
3972   // directive-argument-list:
3973   //    '(' balanced-token-sequence ')'
3974   // parameter-name-list:
3975   //    '::' identifier parameter-name-list[opt]
3976   Token CurTok;
3977   std::optional<LexEmbedParametersResult> Params =
3978       LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
3979 
3980   assert((Params || CurTok.is(tok::eod)) &&
3981          "expected success or to be at the end of the directive");
3982   if (!Params)
3983     return;
3984 
3985   // Now, splat the data out!
3986   SmallString<128> FilenameBuffer;
3987   StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
3988   StringRef OriginalFilename = Filename;
3989   bool isAngled =
3990       GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
3991   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
3992   // error.
3993   assert(!Filename.empty());
3994   OptionalFileEntryRef MaybeFileRef =
3995       this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile);
3996   if (!MaybeFileRef) {
3997     // could not find file
3998     if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
3999       return;
4000     }
4001     Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
4002     return;
4003   }
4004   std::optional<llvm::MemoryBufferRef> MaybeFile =
4005       getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef);
4006   if (!MaybeFile) {
4007     // could not find file
4008     Diag(FilenameTok, diag::err_cannot_open_file)
4009         << Filename << "a buffer to the contents could not be created";
4010     return;
4011   }
4012   StringRef BinaryContents = MaybeFile->getBuffer();
4013 
4014   // The order is important between 'offset' and 'limit'; we want to offset
4015   // first and then limit second; otherwise we may reduce the notional resource
4016   // size to something too small to offset into.
4017   if (Params->MaybeOffsetParam) {
4018     // FIXME: just like with the limit() and if_empty() parameters, this loses
4019     // source fidelity in the AST; it has no idea that there was an offset
4020     // involved.
4021     // offsets all the way to the end of the file make for an empty file.
4022     BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset);
4023   }
4024 
4025   if (Params->MaybeLimitParam) {
4026     // FIXME: just like with the clang::offset() and if_empty() parameters,
4027     // this loses source fidelity in the AST; it has no idea there was a limit
4028     // involved.
4029     BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit);
4030   }
4031 
4032   if (Callbacks)
4033     Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
4034                               *Params);
4035   HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents);
4036 }
4037