1 //===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Implements # directive processing for the Preprocessor. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Basic/AttributeCommonInfo.h" 15 #include "clang/Basic/Attributes.h" 16 #include "clang/Basic/CharInfo.h" 17 #include "clang/Basic/DirectoryEntry.h" 18 #include "clang/Basic/FileManager.h" 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/LangOptions.h" 21 #include "clang/Basic/Module.h" 22 #include "clang/Basic/SourceLocation.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Basic/TargetInfo.h" 25 #include "clang/Basic/TokenKinds.h" 26 #include "clang/Lex/CodeCompletionHandler.h" 27 #include "clang/Lex/HeaderSearch.h" 28 #include "clang/Lex/LexDiagnostic.h" 29 #include "clang/Lex/LiteralSupport.h" 30 #include "clang/Lex/MacroInfo.h" 31 #include "clang/Lex/ModuleLoader.h" 32 #include "clang/Lex/ModuleMap.h" 33 #include "clang/Lex/PPCallbacks.h" 34 #include "clang/Lex/Pragma.h" 35 #include "clang/Lex/Preprocessor.h" 36 #include "clang/Lex/PreprocessorOptions.h" 37 #include "clang/Lex/Token.h" 38 #include "clang/Lex/VariadicMacroSupport.h" 39 #include "llvm/ADT/ArrayRef.h" 40 #include "llvm/ADT/STLExtras.h" 41 #include "llvm/ADT/ScopeExit.h" 42 #include "llvm/ADT/SmallVector.h" 43 #include "llvm/ADT/StringExtras.h" 44 #include "llvm/ADT/StringRef.h" 45 #include "llvm/ADT/StringSwitch.h" 46 #include "llvm/Support/ErrorHandling.h" 47 #include "llvm/Support/Path.h" 48 #include "llvm/Support/SaveAndRestore.h" 49 #include <algorithm> 50 #include <cassert> 51 #include <cstring> 52 #include <optional> 53 #include <string> 54 #include <utility> 55 56 using namespace clang; 57 58 //===----------------------------------------------------------------------===// 59 // Utility Methods for Preprocessor Directive Handling. 60 //===----------------------------------------------------------------------===// 61 62 MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) { 63 static_assert(std::is_trivially_destructible_v<MacroInfo>, ""); 64 return new (BP) MacroInfo(L); 65 } 66 67 DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI, 68 SourceLocation Loc) { 69 return new (BP) DefMacroDirective(MI, Loc); 70 } 71 72 UndefMacroDirective * 73 Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) { 74 return new (BP) UndefMacroDirective(UndefLoc); 75 } 76 77 VisibilityMacroDirective * 78 Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc, 79 bool isPublic) { 80 return new (BP) VisibilityMacroDirective(Loc, isPublic); 81 } 82 83 /// Read and discard all tokens remaining on the current line until 84 /// the tok::eod token is found. 85 SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) { 86 SourceRange Res; 87 88 LexUnexpandedToken(Tmp); 89 Res.setBegin(Tmp.getLocation()); 90 while (Tmp.isNot(tok::eod)) { 91 assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens"); 92 LexUnexpandedToken(Tmp); 93 } 94 Res.setEnd(Tmp.getLocation()); 95 return Res; 96 } 97 98 /// Enumerates possible cases of #define/#undef a reserved identifier. 99 enum MacroDiag { 100 MD_NoWarn, //> Not a reserved identifier 101 MD_KeywordDef, //> Macro hides keyword, enabled by default 102 MD_ReservedMacro, //> #define of #undef reserved id, disabled by default 103 MD_ReservedAttributeIdentifier 104 }; 105 106 /// Enumerates possible %select values for the pp_err_elif_after_else and 107 /// pp_err_elif_without_if diagnostics. 108 enum PPElifDiag { 109 PED_Elif, 110 PED_Elifdef, 111 PED_Elifndef 112 }; 113 114 static bool isFeatureTestMacro(StringRef MacroName) { 115 // list from: 116 // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html 117 // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160 118 // * man 7 feature_test_macros 119 // The list must be sorted for correct binary search. 120 static constexpr StringRef ReservedMacro[] = { 121 "_ATFILE_SOURCE", 122 "_BSD_SOURCE", 123 "_CRT_NONSTDC_NO_WARNINGS", 124 "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES", 125 "_CRT_SECURE_NO_WARNINGS", 126 "_FILE_OFFSET_BITS", 127 "_FORTIFY_SOURCE", 128 "_GLIBCXX_ASSERTIONS", 129 "_GLIBCXX_CONCEPT_CHECKS", 130 "_GLIBCXX_DEBUG", 131 "_GLIBCXX_DEBUG_PEDANTIC", 132 "_GLIBCXX_PARALLEL", 133 "_GLIBCXX_PARALLEL_ASSERTIONS", 134 "_GLIBCXX_SANITIZE_VECTOR", 135 "_GLIBCXX_USE_CXX11_ABI", 136 "_GLIBCXX_USE_DEPRECATED", 137 "_GNU_SOURCE", 138 "_ISOC11_SOURCE", 139 "_ISOC95_SOURCE", 140 "_ISOC99_SOURCE", 141 "_LARGEFILE64_SOURCE", 142 "_POSIX_C_SOURCE", 143 "_REENTRANT", 144 "_SVID_SOURCE", 145 "_THREAD_SAFE", 146 "_XOPEN_SOURCE", 147 "_XOPEN_SOURCE_EXTENDED", 148 "__STDCPP_WANT_MATH_SPEC_FUNCS__", 149 "__STDC_FORMAT_MACROS", 150 }; 151 return std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro), 152 MacroName); 153 } 154 155 static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr, 156 const MacroInfo *MI, 157 const StringRef MacroName) { 158 // If this is a macro with special handling (like __LINE__) then it's language 159 // defined. 160 if (MI->isBuiltinMacro()) 161 return true; 162 // Builtin macros are defined in the builtin file 163 if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc())) 164 return false; 165 // C defines macros starting with __STDC, and C++ defines macros starting with 166 // __STDCPP 167 if (MacroName.starts_with("__STDC")) 168 return true; 169 // C++ defines the __cplusplus macro 170 if (MacroName == "__cplusplus") 171 return true; 172 // C++ defines various feature-test macros starting with __cpp 173 if (MacroName.starts_with("__cpp")) 174 return true; 175 // Anything else isn't language-defined 176 return false; 177 } 178 179 static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) { 180 const LangOptions &Lang = PP.getLangOpts(); 181 if (Lang.CPlusPlus && 182 hasAttribute(AttributeCommonInfo::AS_CXX11, /* Scope*/ nullptr, II, 183 PP.getTargetInfo(), Lang, /*CheckPlugins*/ false) > 0) { 184 AttributeCommonInfo::AttrArgsInfo AttrArgsInfo = 185 AttributeCommonInfo::getCXX11AttrArgsInfo(II); 186 if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required) 187 return PP.isNextPPTokenLParen(); 188 189 return !PP.isNextPPTokenLParen() || 190 AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional; 191 } 192 return false; 193 } 194 195 static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) { 196 const LangOptions &Lang = PP.getLangOpts(); 197 StringRef Text = II->getName(); 198 if (isReservedInAllContexts(II->isReserved(Lang))) 199 return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro; 200 if (II->isKeyword(Lang)) 201 return MD_KeywordDef; 202 if (Lang.CPlusPlus11 && (Text == "override" || Text == "final")) 203 return MD_KeywordDef; 204 if (isReservedCXXAttributeName(PP, II)) 205 return MD_ReservedAttributeIdentifier; 206 return MD_NoWarn; 207 } 208 209 static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) { 210 const LangOptions &Lang = PP.getLangOpts(); 211 // Do not warn on keyword undef. It is generally harmless and widely used. 212 if (isReservedInAllContexts(II->isReserved(Lang))) 213 return MD_ReservedMacro; 214 if (isReservedCXXAttributeName(PP, II)) 215 return MD_ReservedAttributeIdentifier; 216 return MD_NoWarn; 217 } 218 219 // Return true if we want to issue a diagnostic by default if we 220 // encounter this name in a #include with the wrong case. For now, 221 // this includes the standard C and C++ headers, Posix headers, 222 // and Boost headers. Improper case for these #includes is a 223 // potential portability issue. 224 static bool warnByDefaultOnWrongCase(StringRef Include) { 225 // If the first component of the path is "boost", treat this like a standard header 226 // for the purposes of diagnostics. 227 if (::llvm::sys::path::begin(Include)->equals_insensitive("boost")) 228 return true; 229 230 // "condition_variable" is the longest standard header name at 18 characters. 231 // If the include file name is longer than that, it can't be a standard header. 232 static const size_t MaxStdHeaderNameLen = 18u; 233 if (Include.size() > MaxStdHeaderNameLen) 234 return false; 235 236 // Lowercase and normalize the search string. 237 SmallString<32> LowerInclude{Include}; 238 for (char &Ch : LowerInclude) { 239 // In the ASCII range? 240 if (static_cast<unsigned char>(Ch) > 0x7f) 241 return false; // Can't be a standard header 242 // ASCII lowercase: 243 if (Ch >= 'A' && Ch <= 'Z') 244 Ch += 'a' - 'A'; 245 // Normalize path separators for comparison purposes. 246 else if (::llvm::sys::path::is_separator(Ch)) 247 Ch = '/'; 248 } 249 250 // The standard C/C++ and Posix headers 251 return llvm::StringSwitch<bool>(LowerInclude) 252 // C library headers 253 .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true) 254 .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true) 255 .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true) 256 .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stddef.h", true) 257 .Cases("stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true) 258 .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true) 259 .Cases("wchar.h", "wctype.h", true) 260 261 // C++ headers for C library facilities 262 .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true) 263 .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true) 264 .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true) 265 .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true) 266 .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true) 267 .Case("cwctype", true) 268 269 // C++ library headers 270 .Cases("algorithm", "fstream", "list", "regex", "thread", true) 271 .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true) 272 .Cases("atomic", "future", "map", "set", "type_traits", true) 273 .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true) 274 .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true) 275 .Cases("codecvt", "ios", "new", "stack", "unordered_map", true) 276 .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true) 277 .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true) 278 .Cases("deque", "istream", "queue", "string", "valarray", true) 279 .Cases("exception", "iterator", "random", "strstream", "vector", true) 280 .Cases("forward_list", "limits", "ratio", "system_error", true) 281 282 // POSIX headers (which aren't also C headers) 283 .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true) 284 .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true) 285 .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true) 286 .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true) 287 .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true) 288 .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true) 289 .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true) 290 .Cases("sys/resource.h", "sys/select.h", "sys/sem.h", "sys/shm.h", "sys/socket.h", true) 291 .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true) 292 .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true) 293 .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true) 294 .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true) 295 .Default(false); 296 } 297 298 /// Find a similar string in `Candidates`. 299 /// 300 /// \param LHS a string for a similar string in `Candidates` 301 /// 302 /// \param Candidates the candidates to find a similar string. 303 /// 304 /// \returns a similar string if exists. If no similar string exists, 305 /// returns std::nullopt. 306 static std::optional<StringRef> 307 findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) { 308 // We need to check if `Candidates` has the exact case-insensitive string 309 // because the Levenshtein distance match does not care about it. 310 for (StringRef C : Candidates) { 311 if (LHS.equals_insensitive(C)) { 312 return C; 313 } 314 } 315 316 // Keep going with the Levenshtein distance match. 317 // If the LHS size is less than 3, use the LHS size minus 1 and if not, 318 // use the LHS size divided by 3. 319 size_t Length = LHS.size(); 320 size_t MaxDist = Length < 3 ? Length - 1 : Length / 3; 321 322 std::optional<std::pair<StringRef, size_t>> SimilarStr; 323 for (StringRef C : Candidates) { 324 size_t CurDist = LHS.edit_distance(C, true); 325 if (CurDist <= MaxDist) { 326 if (!SimilarStr) { 327 // The first similar string found. 328 SimilarStr = {C, CurDist}; 329 } else if (CurDist < SimilarStr->second) { 330 // More similar string found. 331 SimilarStr = {C, CurDist}; 332 } 333 } 334 } 335 336 if (SimilarStr) { 337 return SimilarStr->first; 338 } else { 339 return std::nullopt; 340 } 341 } 342 343 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 344 bool *ShadowFlag) { 345 // Missing macro name? 346 if (MacroNameTok.is(tok::eod)) 347 return Diag(MacroNameTok, diag::err_pp_missing_macro_name); 348 349 IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); 350 if (!II) 351 return Diag(MacroNameTok, diag::err_pp_macro_not_identifier); 352 353 if (II->isCPlusPlusOperatorKeyword()) { 354 // C++ 2.5p2: Alternative tokens behave the same as its primary token 355 // except for their spellings. 356 Diag(MacroNameTok, getLangOpts().MicrosoftExt 357 ? diag::ext_pp_operator_used_as_macro_name 358 : diag::err_pp_operator_used_as_macro_name) 359 << II << MacroNameTok.getKind(); 360 // Allow #defining |and| and friends for Microsoft compatibility or 361 // recovery when legacy C headers are included in C++. 362 } 363 364 if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) { 365 // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4. 366 return Diag(MacroNameTok, diag::err_defined_macro_name); 367 } 368 369 // If defining/undefining reserved identifier or a keyword, we need to issue 370 // a warning. 371 SourceLocation MacroNameLoc = MacroNameTok.getLocation(); 372 if (ShadowFlag) 373 *ShadowFlag = false; 374 if (!SourceMgr.isInSystemHeader(MacroNameLoc) && 375 (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) { 376 MacroDiag D = MD_NoWarn; 377 if (isDefineUndef == MU_Define) { 378 D = shouldWarnOnMacroDef(*this, II); 379 } 380 else if (isDefineUndef == MU_Undef) 381 D = shouldWarnOnMacroUndef(*this, II); 382 if (D == MD_KeywordDef) { 383 // We do not want to warn on some patterns widely used in configuration 384 // scripts. This requires analyzing next tokens, so do not issue warnings 385 // now, only inform caller. 386 if (ShadowFlag) 387 *ShadowFlag = true; 388 } 389 if (D == MD_ReservedMacro) 390 Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id); 391 if (D == MD_ReservedAttributeIdentifier) 392 Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_attribute_id) 393 << II->getName(); 394 } 395 396 // Okay, we got a good identifier. 397 return false; 398 } 399 400 /// Lex and validate a macro name, which occurs after a 401 /// \#define or \#undef. 402 /// 403 /// This sets the token kind to eod and discards the rest of the macro line if 404 /// the macro name is invalid. 405 /// 406 /// \param MacroNameTok Token that is expected to be a macro name. 407 /// \param isDefineUndef Context in which macro is used. 408 /// \param ShadowFlag Points to a flag that is set if macro shadows a keyword. 409 void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef, 410 bool *ShadowFlag) { 411 // Read the token, don't allow macro expansion on it. 412 LexUnexpandedToken(MacroNameTok); 413 414 if (MacroNameTok.is(tok::code_completion)) { 415 if (CodeComplete) 416 CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define); 417 setCodeCompletionReached(); 418 LexUnexpandedToken(MacroNameTok); 419 } 420 421 if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag)) 422 return; 423 424 // Invalid macro name, read and discard the rest of the line and set the 425 // token kind to tok::eod if necessary. 426 if (MacroNameTok.isNot(tok::eod)) { 427 MacroNameTok.setKind(tok::eod); 428 DiscardUntilEndOfDirective(); 429 } 430 } 431 432 /// Ensure that the next token is a tok::eod token. 433 /// 434 /// If not, emit a diagnostic and consume up until the eod. If EnableMacros is 435 /// true, then we consider macros that expand to zero tokens as being ok. 436 /// 437 /// Returns the location of the end of the directive. 438 SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, 439 bool EnableMacros) { 440 Token Tmp; 441 // Lex unexpanded tokens for most directives: macros might expand to zero 442 // tokens, causing us to miss diagnosing invalid lines. Some directives (like 443 // #line) allow empty macros. 444 if (EnableMacros) 445 Lex(Tmp); 446 else 447 LexUnexpandedToken(Tmp); 448 449 // There should be no tokens after the directive, but we allow them as an 450 // extension. 451 while (Tmp.is(tok::comment)) // Skip comments in -C mode. 452 LexUnexpandedToken(Tmp); 453 454 if (Tmp.is(tok::eod)) 455 return Tmp.getLocation(); 456 457 // Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89, 458 // or if this is a macro-style preprocessing directive, because it is more 459 // trouble than it is worth to insert /**/ and check that there is no /**/ 460 // in the range also. 461 FixItHint Hint; 462 if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) && 463 !CurTokenLexer) 464 Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//"); 465 Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; 466 return DiscardUntilEndOfDirective().getEnd(); 467 } 468 469 void Preprocessor::SuggestTypoedDirective(const Token &Tok, 470 StringRef Directive) const { 471 // If this is a `.S` file, treat unknown # directives as non-preprocessor 472 // directives. 473 if (getLangOpts().AsmPreprocessor) return; 474 475 std::vector<StringRef> Candidates = { 476 "if", "ifdef", "ifndef", "elif", "else", "endif" 477 }; 478 if (LangOpts.C23 || LangOpts.CPlusPlus23) 479 Candidates.insert(Candidates.end(), {"elifdef", "elifndef"}); 480 481 if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) { 482 // Directive cannot be coming from macro. 483 assert(Tok.getLocation().isFileID()); 484 CharSourceRange DirectiveRange = CharSourceRange::getCharRange( 485 Tok.getLocation(), 486 Tok.getLocation().getLocWithOffset(Directive.size())); 487 StringRef SuggValue = *Sugg; 488 489 auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue); 490 Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint; 491 } 492 } 493 494 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and 495 /// decided that the subsequent tokens are in the \#if'd out portion of the 496 /// file. Lex the rest of the file, until we see an \#endif. If 497 /// FoundNonSkipPortion is true, then we have already emitted code for part of 498 /// this \#if directive, so \#else/\#elif blocks should never be entered. 499 /// If ElseOk is true, then \#else directives are ok, if not, then we have 500 /// already seen one so a \#else directive is a duplicate. When this returns, 501 /// the caller can lex the first valid token. 502 void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc, 503 SourceLocation IfTokenLoc, 504 bool FoundNonSkipPortion, 505 bool FoundElse, 506 SourceLocation ElseLoc) { 507 // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock() 508 // not getting called recursively by storing the RecordedSkippedRanges 509 // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects 510 // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be 511 // invalidated. If this changes and there is a need to call 512 // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should 513 // change to do a second lookup in endLexPass function instead of reusing the 514 // lookup pointer. 515 assert(!SkippingExcludedConditionalBlock && 516 "calling SkipExcludedConditionalBlock recursively"); 517 llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true); 518 519 ++NumSkipped; 520 assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!"); 521 assert(CurPPLexer && "Conditional PP block must be in a file!"); 522 assert(CurLexer && "Conditional PP block but no current lexer set!"); 523 524 if (PreambleConditionalStack.reachedEOFWhileSkipping()) 525 PreambleConditionalStack.clearSkipInfo(); 526 else 527 CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false, 528 FoundNonSkipPortion, FoundElse); 529 530 // Enter raw mode to disable identifier lookup (and thus macro expansion), 531 // disabling warnings, etc. 532 CurPPLexer->LexingRawMode = true; 533 Token Tok; 534 SourceLocation endLoc; 535 536 /// Keeps track and caches skipped ranges and also retrieves a prior skipped 537 /// range if the same block is re-visited. 538 struct SkippingRangeStateTy { 539 Preprocessor &PP; 540 541 const char *BeginPtr = nullptr; 542 unsigned *SkipRangePtr = nullptr; 543 544 SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {} 545 546 void beginLexPass() { 547 if (BeginPtr) 548 return; // continue skipping a block. 549 550 // Initiate a skipping block and adjust the lexer if we already skipped it 551 // before. 552 BeginPtr = PP.CurLexer->getBufferLocation(); 553 SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr]; 554 if (*SkipRangePtr) { 555 PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr, 556 /*IsAtStartOfLine*/ true); 557 } 558 } 559 560 void endLexPass(const char *Hashptr) { 561 if (!BeginPtr) { 562 // Not doing normal lexing. 563 assert(PP.CurLexer->isDependencyDirectivesLexer()); 564 return; 565 } 566 567 // Finished skipping a block, record the range if it's first time visited. 568 if (!*SkipRangePtr) { 569 *SkipRangePtr = Hashptr - BeginPtr; 570 } 571 assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr)); 572 BeginPtr = nullptr; 573 SkipRangePtr = nullptr; 574 } 575 } SkippingRangeState(*this); 576 577 while (true) { 578 if (CurLexer->isDependencyDirectivesLexer()) { 579 CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok); 580 } else { 581 SkippingRangeState.beginLexPass(); 582 while (true) { 583 CurLexer->Lex(Tok); 584 585 if (Tok.is(tok::code_completion)) { 586 setCodeCompletionReached(); 587 if (CodeComplete) 588 CodeComplete->CodeCompleteInConditionalExclusion(); 589 continue; 590 } 591 592 // If this is the end of the buffer, we have an error. 593 if (Tok.is(tok::eof)) { 594 // We don't emit errors for unterminated conditionals here, 595 // Lexer::LexEndOfFile can do that properly. 596 // Just return and let the caller lex after this #include. 597 if (PreambleConditionalStack.isRecording()) 598 PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc, 599 FoundNonSkipPortion, 600 FoundElse, ElseLoc); 601 break; 602 } 603 604 // If this token is not a preprocessor directive, just skip it. 605 if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) 606 continue; 607 608 break; 609 } 610 } 611 if (Tok.is(tok::eof)) 612 break; 613 614 // We just parsed a # character at the start of a line, so we're in 615 // directive mode. Tell the lexer this so any newlines we see will be 616 // converted into an EOD token (this terminates the macro). 617 CurPPLexer->ParsingPreprocessorDirective = true; 618 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); 619 620 assert(Tok.is(tok::hash)); 621 const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength(); 622 assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation()); 623 624 // Read the next token, the directive flavor. 625 LexUnexpandedToken(Tok); 626 627 // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or 628 // something bogus), skip it. 629 if (Tok.isNot(tok::raw_identifier)) { 630 CurPPLexer->ParsingPreprocessorDirective = false; 631 // Restore comment saving mode. 632 if (CurLexer) CurLexer->resetExtendedTokenMode(); 633 continue; 634 } 635 636 // If the first letter isn't i or e, it isn't intesting to us. We know that 637 // this is safe in the face of spelling differences, because there is no way 638 // to spell an i/e in a strange way that is another letter. Skipping this 639 // allows us to avoid looking up the identifier info for #define/#undef and 640 // other common directives. 641 StringRef RI = Tok.getRawIdentifier(); 642 643 char FirstChar = RI[0]; 644 if (FirstChar >= 'a' && FirstChar <= 'z' && 645 FirstChar != 'i' && FirstChar != 'e') { 646 CurPPLexer->ParsingPreprocessorDirective = false; 647 // Restore comment saving mode. 648 if (CurLexer) CurLexer->resetExtendedTokenMode(); 649 continue; 650 } 651 652 // Get the identifier name without trigraphs or embedded newlines. Note 653 // that we can't use Tok.getIdentifierInfo() because its lookup is disabled 654 // when skipping. 655 char DirectiveBuf[20]; 656 StringRef Directive; 657 if (!Tok.needsCleaning() && RI.size() < 20) { 658 Directive = RI; 659 } else { 660 std::string DirectiveStr = getSpelling(Tok); 661 size_t IdLen = DirectiveStr.size(); 662 if (IdLen >= 20) { 663 CurPPLexer->ParsingPreprocessorDirective = false; 664 // Restore comment saving mode. 665 if (CurLexer) CurLexer->resetExtendedTokenMode(); 666 continue; 667 } 668 memcpy(DirectiveBuf, &DirectiveStr[0], IdLen); 669 Directive = StringRef(DirectiveBuf, IdLen); 670 } 671 672 if (Directive.starts_with("if")) { 673 StringRef Sub = Directive.substr(2); 674 if (Sub.empty() || // "if" 675 Sub == "def" || // "ifdef" 676 Sub == "ndef") { // "ifndef" 677 // We know the entire #if/#ifdef/#ifndef block will be skipped, don't 678 // bother parsing the condition. 679 DiscardUntilEndOfDirective(); 680 CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true, 681 /*foundnonskip*/false, 682 /*foundelse*/false); 683 } else { 684 SuggestTypoedDirective(Tok, Directive); 685 } 686 } else if (Directive[0] == 'e') { 687 StringRef Sub = Directive.substr(1); 688 if (Sub == "ndif") { // "endif" 689 PPConditionalInfo CondInfo; 690 CondInfo.WasSkipping = true; // Silence bogus warning. 691 bool InCond = CurPPLexer->popConditionalLevel(CondInfo); 692 (void)InCond; // Silence warning in no-asserts mode. 693 assert(!InCond && "Can't be skipping if not in a conditional!"); 694 695 // If we popped the outermost skipping block, we're done skipping! 696 if (!CondInfo.WasSkipping) { 697 SkippingRangeState.endLexPass(Hashptr); 698 // Restore the value of LexingRawMode so that trailing comments 699 // are handled correctly, if we've reached the outermost block. 700 CurPPLexer->LexingRawMode = false; 701 endLoc = CheckEndOfDirective("endif"); 702 CurPPLexer->LexingRawMode = true; 703 if (Callbacks) 704 Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc); 705 break; 706 } else { 707 DiscardUntilEndOfDirective(); 708 } 709 } else if (Sub == "lse") { // "else". 710 // #else directive in a skipping conditional. If not in some other 711 // skipping conditional, and if #else hasn't already been seen, enter it 712 // as a non-skipping conditional. 713 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel(); 714 715 if (!CondInfo.WasSkipping) 716 SkippingRangeState.endLexPass(Hashptr); 717 718 // If this is a #else with a #else before it, report the error. 719 if (CondInfo.FoundElse) 720 Diag(Tok, diag::pp_err_else_after_else); 721 722 // Note that we've seen a #else in this conditional. 723 CondInfo.FoundElse = true; 724 725 // If the conditional is at the top level, and the #if block wasn't 726 // entered, enter the #else block now. 727 if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) { 728 CondInfo.FoundNonSkip = true; 729 // Restore the value of LexingRawMode so that trailing comments 730 // are handled correctly. 731 CurPPLexer->LexingRawMode = false; 732 endLoc = CheckEndOfDirective("else"); 733 CurPPLexer->LexingRawMode = true; 734 if (Callbacks) 735 Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc); 736 break; 737 } else { 738 DiscardUntilEndOfDirective(); // C99 6.10p4. 739 } 740 } else if (Sub == "lif") { // "elif". 741 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel(); 742 743 if (!CondInfo.WasSkipping) 744 SkippingRangeState.endLexPass(Hashptr); 745 746 // If this is a #elif with a #else before it, report the error. 747 if (CondInfo.FoundElse) 748 Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif; 749 750 // If this is in a skipping block or if we're already handled this #if 751 // block, don't bother parsing the condition. 752 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) { 753 // FIXME: We should probably do at least some minimal parsing of the 754 // condition to verify that it is well-formed. The current state 755 // allows #elif* directives with completely malformed (or missing) 756 // conditions. 757 DiscardUntilEndOfDirective(); 758 } else { 759 // Restore the value of LexingRawMode so that identifiers are 760 // looked up, etc, inside the #elif expression. 761 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!"); 762 CurPPLexer->LexingRawMode = false; 763 IdentifierInfo *IfNDefMacro = nullptr; 764 DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro); 765 // Stop if Lexer became invalid after hitting code completion token. 766 if (!CurPPLexer) 767 return; 768 const bool CondValue = DER.Conditional; 769 CurPPLexer->LexingRawMode = true; 770 if (Callbacks) { 771 Callbacks->Elif( 772 Tok.getLocation(), DER.ExprRange, 773 (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False), 774 CondInfo.IfLoc); 775 } 776 // If this condition is true, enter it! 777 if (CondValue) { 778 CondInfo.FoundNonSkip = true; 779 break; 780 } 781 } 782 } else if (Sub == "lifdef" || // "elifdef" 783 Sub == "lifndef") { // "elifndef" 784 bool IsElifDef = Sub == "lifdef"; 785 PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel(); 786 Token DirectiveToken = Tok; 787 788 if (!CondInfo.WasSkipping) 789 SkippingRangeState.endLexPass(Hashptr); 790 791 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even 792 // if this branch is in a skipping block. 793 unsigned DiagID; 794 if (LangOpts.CPlusPlus) 795 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive 796 : diag::ext_cxx23_pp_directive; 797 else 798 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive 799 : diag::ext_c23_pp_directive; 800 Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef); 801 802 // If this is a #elif with a #else before it, report the error. 803 if (CondInfo.FoundElse) 804 Diag(Tok, diag::pp_err_elif_after_else) 805 << (IsElifDef ? PED_Elifdef : PED_Elifndef); 806 807 // If this is in a skipping block or if we're already handled this #if 808 // block, don't bother parsing the condition. 809 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) { 810 // FIXME: We should probably do at least some minimal parsing of the 811 // condition to verify that it is well-formed. The current state 812 // allows #elif* directives with completely malformed (or missing) 813 // conditions. 814 DiscardUntilEndOfDirective(); 815 } else { 816 // Restore the value of LexingRawMode so that identifiers are 817 // looked up, etc, inside the #elif[n]def expression. 818 assert(CurPPLexer->LexingRawMode && "We have to be skipping here!"); 819 CurPPLexer->LexingRawMode = false; 820 Token MacroNameTok; 821 ReadMacroName(MacroNameTok); 822 CurPPLexer->LexingRawMode = true; 823 824 // If the macro name token is tok::eod, there was an error that was 825 // already reported. 826 if (MacroNameTok.is(tok::eod)) { 827 // Skip code until we get to #endif. This helps with recovery by 828 // not emitting an error when the #endif is reached. 829 continue; 830 } 831 832 emitMacroExpansionWarnings(MacroNameTok); 833 834 CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef"); 835 836 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); 837 auto MD = getMacroDefinition(MII); 838 MacroInfo *MI = MD.getMacroInfo(); 839 840 if (Callbacks) { 841 if (IsElifDef) { 842 Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok, 843 MD); 844 } else { 845 Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok, 846 MD); 847 } 848 } 849 // If this condition is true, enter it! 850 if (static_cast<bool>(MI) == IsElifDef) { 851 CondInfo.FoundNonSkip = true; 852 break; 853 } 854 } 855 } else { 856 SuggestTypoedDirective(Tok, Directive); 857 } 858 } else { 859 SuggestTypoedDirective(Tok, Directive); 860 } 861 862 CurPPLexer->ParsingPreprocessorDirective = false; 863 // Restore comment saving mode. 864 if (CurLexer) CurLexer->resetExtendedTokenMode(); 865 } 866 867 // Finally, if we are out of the conditional (saw an #endif or ran off the end 868 // of the file, just stop skipping and return to lexing whatever came after 869 // the #if block. 870 CurPPLexer->LexingRawMode = false; 871 872 // The last skipped range isn't actually skipped yet if it's truncated 873 // by the end of the preamble; we'll resume parsing after the preamble. 874 if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble())) 875 Callbacks->SourceRangeSkipped( 876 SourceRange(HashTokenLoc, endLoc.isValid() 877 ? endLoc 878 : CurPPLexer->getSourceLocation()), 879 Tok.getLocation()); 880 } 881 882 Module *Preprocessor::getModuleForLocation(SourceLocation Loc, 883 bool AllowTextual) { 884 if (!SourceMgr.isInMainFile(Loc)) { 885 // Try to determine the module of the include directive. 886 // FIXME: Look into directly passing the FileEntry from LookupFile instead. 887 FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc)); 888 if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) { 889 // The include comes from an included file. 890 return HeaderInfo.getModuleMap() 891 .findModuleForHeader(*EntryOfIncl, AllowTextual) 892 .getModule(); 893 } 894 } 895 896 // This is either in the main file or not in a file at all. It belongs 897 // to the current module, if there is one. 898 return getLangOpts().CurrentModule.empty() 899 ? nullptr 900 : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc); 901 } 902 903 OptionalFileEntryRef 904 Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc, 905 SourceLocation Loc) { 906 Module *IncM = getModuleForLocation( 907 IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes); 908 909 // Walk up through the include stack, looking through textual headers of M 910 // until we hit a non-textual header that we can #include. (We assume textual 911 // headers of a module with non-textual headers aren't meant to be used to 912 // import entities from the module.) 913 auto &SM = getSourceManager(); 914 while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) { 915 auto ID = SM.getFileID(SM.getExpansionLoc(Loc)); 916 auto FE = SM.getFileEntryRefForID(ID); 917 if (!FE) 918 break; 919 920 // We want to find all possible modules that might contain this header, so 921 // search all enclosing directories for module maps and load them. 922 HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr, 923 SourceMgr.isInSystemHeader(Loc)); 924 925 bool InPrivateHeader = false; 926 for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) { 927 if (!Header.isAccessibleFrom(IncM)) { 928 // It's in a private header; we can't #include it. 929 // FIXME: If there's a public header in some module that re-exports it, 930 // then we could suggest including that, but it's not clear that's the 931 // expected way to make this entity visible. 932 InPrivateHeader = true; 933 continue; 934 } 935 936 // Don't suggest explicitly excluded headers. 937 if (Header.getRole() == ModuleMap::ExcludedHeader) 938 continue; 939 940 // We'll suggest including textual headers below if they're 941 // include-guarded. 942 if (Header.getRole() & ModuleMap::TextualHeader) 943 continue; 944 945 // If we have a module import syntax, we shouldn't include a header to 946 // make a particular module visible. Let the caller know they should 947 // suggest an import instead. 948 if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules) 949 return std::nullopt; 950 951 // If this is an accessible, non-textual header of M's top-level module 952 // that transitively includes the given location and makes the 953 // corresponding module visible, this is the thing to #include. 954 return *FE; 955 } 956 957 // FIXME: If we're bailing out due to a private header, we shouldn't suggest 958 // an import either. 959 if (InPrivateHeader) 960 return std::nullopt; 961 962 // If the header is includable and has an include guard, assume the 963 // intended way to expose its contents is by #include, not by importing a 964 // module that transitively includes it. 965 if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE)) 966 return *FE; 967 968 Loc = SM.getIncludeLoc(ID); 969 } 970 971 return std::nullopt; 972 } 973 974 OptionalFileEntryRef Preprocessor::LookupFile( 975 SourceLocation FilenameLoc, StringRef Filename, bool isAngled, 976 ConstSearchDirIterator FromDir, const FileEntry *FromFile, 977 ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath, 978 SmallVectorImpl<char> *RelativePath, 979 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, 980 bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) { 981 ConstSearchDirIterator CurDirLocal = nullptr; 982 ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal; 983 984 Module *RequestingModule = getModuleForLocation( 985 FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes); 986 987 // If the header lookup mechanism may be relative to the current inclusion 988 // stack, record the parent #includes. 989 SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers; 990 bool BuildSystemModule = false; 991 if (!FromDir && !FromFile) { 992 FileID FID = getCurrentFileLexer()->getFileID(); 993 OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID); 994 995 // If there is no file entry associated with this file, it must be the 996 // predefines buffer or the module includes buffer. Any other file is not 997 // lexed with a normal lexer, so it won't be scanned for preprocessor 998 // directives. 999 // 1000 // If we have the predefines buffer, resolve #include references (which come 1001 // from the -include command line argument) from the current working 1002 // directory instead of relative to the main file. 1003 // 1004 // If we have the module includes buffer, resolve #include references (which 1005 // come from header declarations in the module map) relative to the module 1006 // map file. 1007 if (!FileEnt) { 1008 if (FID == SourceMgr.getMainFileID() && MainFileDir) { 1009 auto IncludeDir = 1010 HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir( 1011 Filename, getCurrentModule()) 1012 ? HeaderInfo.getModuleMap().getBuiltinDir() 1013 : MainFileDir; 1014 Includers.push_back(std::make_pair(std::nullopt, *IncludeDir)); 1015 BuildSystemModule = getCurrentModule()->IsSystem; 1016 } else if ((FileEnt = SourceMgr.getFileEntryRefForID( 1017 SourceMgr.getMainFileID()))) { 1018 auto CWD = FileMgr.getOptionalDirectoryRef("."); 1019 Includers.push_back(std::make_pair(*FileEnt, *CWD)); 1020 } 1021 } else { 1022 Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir())); 1023 } 1024 1025 // MSVC searches the current include stack from top to bottom for 1026 // headers included by quoted include directives. 1027 // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx 1028 if (LangOpts.MSVCCompat && !isAngled) { 1029 for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) { 1030 if (IsFileLexer(ISEntry)) 1031 if ((FileEnt = ISEntry.ThePPLexer->getFileEntry())) 1032 Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir())); 1033 } 1034 } 1035 } 1036 1037 CurDir = CurDirLookup; 1038 1039 if (FromFile) { 1040 // We're supposed to start looking from after a particular file. Search 1041 // the include path until we find that file or run out of files. 1042 ConstSearchDirIterator TmpCurDir = CurDir; 1043 ConstSearchDirIterator TmpFromDir = nullptr; 1044 while (OptionalFileEntryRef FE = HeaderInfo.LookupFile( 1045 Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir, 1046 Includers, SearchPath, RelativePath, RequestingModule, 1047 SuggestedModule, /*IsMapped=*/nullptr, 1048 /*IsFrameworkFound=*/nullptr, SkipCache)) { 1049 // Keep looking as if this file did a #include_next. 1050 TmpFromDir = TmpCurDir; 1051 ++TmpFromDir; 1052 if (&FE->getFileEntry() == FromFile) { 1053 // Found it. 1054 FromDir = TmpFromDir; 1055 CurDir = TmpCurDir; 1056 break; 1057 } 1058 } 1059 } 1060 1061 // Do a standard file entry lookup. 1062 OptionalFileEntryRef FE = HeaderInfo.LookupFile( 1063 Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath, 1064 RelativePath, RequestingModule, SuggestedModule, IsMapped, 1065 IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures); 1066 if (FE) 1067 return FE; 1068 1069 OptionalFileEntryRef CurFileEnt; 1070 // Otherwise, see if this is a subframework header. If so, this is relative 1071 // to one of the headers on the #include stack. Walk the list of the current 1072 // headers on the #include stack and pass them to HeaderInfo. 1073 if (IsFileLexer()) { 1074 if ((CurFileEnt = CurPPLexer->getFileEntry())) { 1075 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader( 1076 Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule, 1077 SuggestedModule)) { 1078 return FE; 1079 } 1080 } 1081 } 1082 1083 for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) { 1084 if (IsFileLexer(ISEntry)) { 1085 if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) { 1086 if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader( 1087 Filename, *CurFileEnt, SearchPath, RelativePath, 1088 RequestingModule, SuggestedModule)) { 1089 return FE; 1090 } 1091 } 1092 } 1093 } 1094 1095 // Otherwise, we really couldn't find the file. 1096 return std::nullopt; 1097 } 1098 1099 OptionalFileEntryRef 1100 Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile, 1101 const FileEntry *LookupFromFile) { 1102 FileManager &FM = this->getFileManager(); 1103 if (llvm::sys::path::is_absolute(Filename)) { 1104 // lookup path or immediately fail 1105 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef( 1106 Filename, OpenFile, /*CacheFailure=*/true, /*IsText=*/false); 1107 return llvm::expectedToOptional(std::move(ShouldBeEntry)); 1108 } 1109 1110 auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath, 1111 StringRef StartingFrom, StringRef FileName, 1112 bool RemoveInitialFileComponentFromLookupPath) { 1113 llvm::sys::path::native(StartingFrom, LookupPath); 1114 if (RemoveInitialFileComponentFromLookupPath) 1115 llvm::sys::path::remove_filename(LookupPath); 1116 if (!LookupPath.empty() && 1117 !llvm::sys::path::is_separator(LookupPath.back())) { 1118 LookupPath.push_back(llvm::sys::path::get_separator().front()); 1119 } 1120 LookupPath.append(FileName.begin(), FileName.end()); 1121 }; 1122 1123 // Otherwise, it's search time! 1124 SmallString<512> LookupPath; 1125 // Non-angled lookup 1126 if (!isAngled) { 1127 if (LookupFromFile) { 1128 // Use file-based lookup. 1129 StringRef FullFileDir = LookupFromFile->tryGetRealPathName(); 1130 if (!FullFileDir.empty()) { 1131 SeparateComponents(LookupPath, FullFileDir, Filename, true); 1132 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef( 1133 LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false); 1134 if (ShouldBeEntry) 1135 return llvm::expectedToOptional(std::move(ShouldBeEntry)); 1136 llvm::consumeError(ShouldBeEntry.takeError()); 1137 } 1138 } 1139 1140 // Otherwise, do working directory lookup. 1141 LookupPath.clear(); 1142 auto MaybeWorkingDirEntry = FM.getDirectoryRef("."); 1143 if (MaybeWorkingDirEntry) { 1144 DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry; 1145 StringRef WorkingDir = WorkingDirEntry.getName(); 1146 if (!WorkingDir.empty()) { 1147 SeparateComponents(LookupPath, WorkingDir, Filename, false); 1148 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef( 1149 LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false); 1150 if (ShouldBeEntry) 1151 return llvm::expectedToOptional(std::move(ShouldBeEntry)); 1152 llvm::consumeError(ShouldBeEntry.takeError()); 1153 } 1154 } 1155 } 1156 1157 for (const auto &Entry : PPOpts->EmbedEntries) { 1158 LookupPath.clear(); 1159 SeparateComponents(LookupPath, Entry, Filename, false); 1160 llvm::Expected<FileEntryRef> ShouldBeEntry = FM.getFileRef( 1161 LookupPath, OpenFile, /*CacheFailure=*/true, /*IsText=*/false); 1162 if (ShouldBeEntry) 1163 return llvm::expectedToOptional(std::move(ShouldBeEntry)); 1164 llvm::consumeError(ShouldBeEntry.takeError()); 1165 } 1166 return std::nullopt; 1167 } 1168 1169 //===----------------------------------------------------------------------===// 1170 // Preprocessor Directive Handling. 1171 //===----------------------------------------------------------------------===// 1172 1173 class Preprocessor::ResetMacroExpansionHelper { 1174 public: 1175 ResetMacroExpansionHelper(Preprocessor *pp) 1176 : PP(pp), save(pp->DisableMacroExpansion) { 1177 if (pp->MacroExpansionInDirectivesOverride) 1178 pp->DisableMacroExpansion = false; 1179 } 1180 1181 ~ResetMacroExpansionHelper() { 1182 PP->DisableMacroExpansion = save; 1183 } 1184 1185 private: 1186 Preprocessor *PP; 1187 bool save; 1188 }; 1189 1190 /// Process a directive while looking for the through header or a #pragma 1191 /// hdrstop. The following directives are handled: 1192 /// #include (to check if it is the through header) 1193 /// #define (to warn about macros that don't match the PCH) 1194 /// #pragma (to check for pragma hdrstop). 1195 /// All other directives are completely discarded. 1196 void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result, 1197 SourceLocation HashLoc) { 1198 if (const IdentifierInfo *II = Result.getIdentifierInfo()) { 1199 if (II->getPPKeywordID() == tok::pp_define) { 1200 return HandleDefineDirective(Result, 1201 /*ImmediatelyAfterHeaderGuard=*/false); 1202 } 1203 if (SkippingUntilPCHThroughHeader && 1204 II->getPPKeywordID() == tok::pp_include) { 1205 return HandleIncludeDirective(HashLoc, Result); 1206 } 1207 if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) { 1208 Lex(Result); 1209 auto *II = Result.getIdentifierInfo(); 1210 if (II && II->getName() == "hdrstop") 1211 return HandlePragmaHdrstop(Result); 1212 } 1213 } 1214 DiscardUntilEndOfDirective(); 1215 } 1216 1217 /// HandleDirective - This callback is invoked when the lexer sees a # token 1218 /// at the start of a line. This consumes the directive, modifies the 1219 /// lexer/preprocessor state, and advances the lexer(s) so that the next token 1220 /// read is the correct one. 1221 void Preprocessor::HandleDirective(Token &Result) { 1222 // FIXME: Traditional: # with whitespace before it not recognized by K&R? 1223 1224 // We just parsed a # character at the start of a line, so we're in directive 1225 // mode. Tell the lexer this so any newlines we see will be converted into an 1226 // EOD token (which terminates the directive). 1227 CurPPLexer->ParsingPreprocessorDirective = true; 1228 if (CurLexer) CurLexer->SetKeepWhitespaceMode(false); 1229 1230 bool ImmediatelyAfterTopLevelIfndef = 1231 CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef(); 1232 CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef(); 1233 1234 ++NumDirectives; 1235 1236 // We are about to read a token. For the multiple-include optimization FA to 1237 // work, we have to remember if we had read any tokens *before* this 1238 // pp-directive. 1239 bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal(); 1240 1241 // Save the '#' token in case we need to return it later. 1242 Token SavedHash = Result; 1243 1244 // Read the next token, the directive flavor. This isn't expanded due to 1245 // C99 6.10.3p8. 1246 LexUnexpandedToken(Result); 1247 1248 // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.: 1249 // #define A(x) #x 1250 // A(abc 1251 // #warning blah 1252 // def) 1253 // If so, the user is relying on undefined behavior, emit a diagnostic. Do 1254 // not support this for #include-like directives, since that can result in 1255 // terrible diagnostics, and does not work in GCC. 1256 if (InMacroArgs) { 1257 if (IdentifierInfo *II = Result.getIdentifierInfo()) { 1258 switch (II->getPPKeywordID()) { 1259 case tok::pp_include: 1260 case tok::pp_import: 1261 case tok::pp_include_next: 1262 case tok::pp___include_macros: 1263 case tok::pp_pragma: 1264 case tok::pp_embed: 1265 Diag(Result, diag::err_embedded_directive) << II->getName(); 1266 Diag(*ArgMacro, diag::note_macro_expansion_here) 1267 << ArgMacro->getIdentifierInfo(); 1268 DiscardUntilEndOfDirective(); 1269 return; 1270 default: 1271 break; 1272 } 1273 } 1274 Diag(Result, diag::ext_embedded_directive); 1275 } 1276 1277 // Temporarily enable macro expansion if set so 1278 // and reset to previous state when returning from this function. 1279 ResetMacroExpansionHelper helper(this); 1280 1281 if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop) 1282 return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation()); 1283 1284 switch (Result.getKind()) { 1285 case tok::eod: 1286 // Ignore the null directive with regards to the multiple-include 1287 // optimization, i.e. allow the null directive to appear outside of the 1288 // include guard and still enable the multiple-include optimization. 1289 CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective); 1290 return; // null directive. 1291 case tok::code_completion: 1292 setCodeCompletionReached(); 1293 if (CodeComplete) 1294 CodeComplete->CodeCompleteDirective( 1295 CurPPLexer->getConditionalStackDepth() > 0); 1296 return; 1297 case tok::numeric_constant: // # 7 GNU line marker directive. 1298 // In a .S file "# 4" may be a comment so don't treat it as a preprocessor 1299 // directive. However do permit it in the predefines file, as we use line 1300 // markers to mark the builtin macros as being in a system header. 1301 if (getLangOpts().AsmPreprocessor && 1302 SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID()) 1303 break; 1304 return HandleDigitDirective(Result); 1305 default: 1306 IdentifierInfo *II = Result.getIdentifierInfo(); 1307 if (!II) break; // Not an identifier. 1308 1309 // Ask what the preprocessor keyword ID is. 1310 switch (II->getPPKeywordID()) { 1311 default: break; 1312 // C99 6.10.1 - Conditional Inclusion. 1313 case tok::pp_if: 1314 return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective); 1315 case tok::pp_ifdef: 1316 return HandleIfdefDirective(Result, SavedHash, false, 1317 true /*not valid for miopt*/); 1318 case tok::pp_ifndef: 1319 return HandleIfdefDirective(Result, SavedHash, true, 1320 ReadAnyTokensBeforeDirective); 1321 case tok::pp_elif: 1322 case tok::pp_elifdef: 1323 case tok::pp_elifndef: 1324 return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID()); 1325 1326 case tok::pp_else: 1327 return HandleElseDirective(Result, SavedHash); 1328 case tok::pp_endif: 1329 return HandleEndifDirective(Result); 1330 1331 // C99 6.10.2 - Source File Inclusion. 1332 case tok::pp_include: 1333 // Handle #include. 1334 return HandleIncludeDirective(SavedHash.getLocation(), Result); 1335 case tok::pp___include_macros: 1336 // Handle -imacros. 1337 return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result); 1338 1339 // C99 6.10.3 - Macro Replacement. 1340 case tok::pp_define: 1341 return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef); 1342 case tok::pp_undef: 1343 return HandleUndefDirective(); 1344 1345 // C99 6.10.4 - Line Control. 1346 case tok::pp_line: 1347 return HandleLineDirective(); 1348 1349 // C99 6.10.5 - Error Directive. 1350 case tok::pp_error: 1351 return HandleUserDiagnosticDirective(Result, false); 1352 1353 // C99 6.10.6 - Pragma Directive. 1354 case tok::pp_pragma: 1355 return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()}); 1356 1357 // GNU Extensions. 1358 case tok::pp_import: 1359 return HandleImportDirective(SavedHash.getLocation(), Result); 1360 case tok::pp_include_next: 1361 return HandleIncludeNextDirective(SavedHash.getLocation(), Result); 1362 1363 case tok::pp_warning: 1364 if (LangOpts.CPlusPlus) 1365 Diag(Result, LangOpts.CPlusPlus23 1366 ? diag::warn_cxx23_compat_warning_directive 1367 : diag::ext_pp_warning_directive) 1368 << /*C++23*/ 1; 1369 else 1370 Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive 1371 : diag::ext_pp_warning_directive) 1372 << /*C23*/ 0; 1373 1374 return HandleUserDiagnosticDirective(Result, true); 1375 case tok::pp_ident: 1376 return HandleIdentSCCSDirective(Result); 1377 case tok::pp_sccs: 1378 return HandleIdentSCCSDirective(Result); 1379 case tok::pp_embed: 1380 return HandleEmbedDirective(SavedHash.getLocation(), Result, 1381 getCurrentFileLexer() 1382 ? *getCurrentFileLexer()->getFileEntry() 1383 : static_cast<FileEntry *>(nullptr)); 1384 case tok::pp_assert: 1385 //isExtension = true; // FIXME: implement #assert 1386 break; 1387 case tok::pp_unassert: 1388 //isExtension = true; // FIXME: implement #unassert 1389 break; 1390 1391 case tok::pp___public_macro: 1392 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility) 1393 return HandleMacroPublicDirective(Result); 1394 break; 1395 1396 case tok::pp___private_macro: 1397 if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility) 1398 return HandleMacroPrivateDirective(); 1399 break; 1400 } 1401 break; 1402 } 1403 1404 // If this is a .S file, treat unknown # directives as non-preprocessor 1405 // directives. This is important because # may be a comment or introduce 1406 // various pseudo-ops. Just return the # token and push back the following 1407 // token to be lexed next time. 1408 if (getLangOpts().AsmPreprocessor) { 1409 auto Toks = std::make_unique<Token[]>(2); 1410 // Return the # and the token after it. 1411 Toks[0] = SavedHash; 1412 Toks[1] = Result; 1413 1414 // If the second token is a hashhash token, then we need to translate it to 1415 // unknown so the token lexer doesn't try to perform token pasting. 1416 if (Result.is(tok::hashhash)) 1417 Toks[1].setKind(tok::unknown); 1418 1419 // Enter this token stream so that we re-lex the tokens. Make sure to 1420 // enable macro expansion, in case the token after the # is an identifier 1421 // that is expanded. 1422 EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false); 1423 return; 1424 } 1425 1426 // If we reached here, the preprocessing token is not valid! 1427 // Start suggesting if a similar directive found. 1428 Diag(Result, diag::err_pp_invalid_directive) << 0; 1429 1430 // Read the rest of the PP line. 1431 DiscardUntilEndOfDirective(); 1432 1433 // Okay, we're done parsing the directive. 1434 } 1435 1436 /// GetLineValue - Convert a numeric token into an unsigned value, emitting 1437 /// Diagnostic DiagID if it is invalid, and returning the value in Val. 1438 static bool GetLineValue(Token &DigitTok, unsigned &Val, 1439 unsigned DiagID, Preprocessor &PP, 1440 bool IsGNULineDirective=false) { 1441 if (DigitTok.isNot(tok::numeric_constant)) { 1442 PP.Diag(DigitTok, DiagID); 1443 1444 if (DigitTok.isNot(tok::eod)) 1445 PP.DiscardUntilEndOfDirective(); 1446 return true; 1447 } 1448 1449 SmallString<64> IntegerBuffer; 1450 IntegerBuffer.resize(DigitTok.getLength()); 1451 const char *DigitTokBegin = &IntegerBuffer[0]; 1452 bool Invalid = false; 1453 unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid); 1454 if (Invalid) 1455 return true; 1456 1457 // Verify that we have a simple digit-sequence, and compute the value. This 1458 // is always a simple digit string computed in decimal, so we do this manually 1459 // here. 1460 Val = 0; 1461 for (unsigned i = 0; i != ActualLength; ++i) { 1462 // C++1y [lex.fcon]p1: 1463 // Optional separating single quotes in a digit-sequence are ignored 1464 if (DigitTokBegin[i] == '\'') 1465 continue; 1466 1467 if (!isDigit(DigitTokBegin[i])) { 1468 PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i), 1469 diag::err_pp_line_digit_sequence) << IsGNULineDirective; 1470 PP.DiscardUntilEndOfDirective(); 1471 return true; 1472 } 1473 1474 unsigned NextVal = Val*10+(DigitTokBegin[i]-'0'); 1475 if (NextVal < Val) { // overflow. 1476 PP.Diag(DigitTok, DiagID); 1477 PP.DiscardUntilEndOfDirective(); 1478 return true; 1479 } 1480 Val = NextVal; 1481 } 1482 1483 if (DigitTokBegin[0] == '0' && Val) 1484 PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal) 1485 << IsGNULineDirective; 1486 1487 return false; 1488 } 1489 1490 /// Handle a \#line directive: C99 6.10.4. 1491 /// 1492 /// The two acceptable forms are: 1493 /// \verbatim 1494 /// # line digit-sequence 1495 /// # line digit-sequence "s-char-sequence" 1496 /// \endverbatim 1497 void Preprocessor::HandleLineDirective() { 1498 // Read the line # and string argument. Per C99 6.10.4p5, these tokens are 1499 // expanded. 1500 Token DigitTok; 1501 Lex(DigitTok); 1502 1503 // Validate the number and convert it to an unsigned. 1504 unsigned LineNo; 1505 if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this)) 1506 return; 1507 1508 if (LineNo == 0) 1509 Diag(DigitTok, diag::ext_pp_line_zero); 1510 1511 // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a 1512 // number greater than 2147483647". C90 requires that the line # be <= 32767. 1513 unsigned LineLimit = 32768U; 1514 if (LangOpts.C99 || LangOpts.CPlusPlus11) 1515 LineLimit = 2147483648U; 1516 if (LineNo >= LineLimit) 1517 Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit; 1518 else if (LangOpts.CPlusPlus11 && LineNo >= 32768U) 1519 Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big); 1520 1521 int FilenameID = -1; 1522 Token StrTok; 1523 Lex(StrTok); 1524 1525 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a 1526 // string followed by eod. 1527 if (StrTok.is(tok::eod)) 1528 ; // ok 1529 else if (StrTok.isNot(tok::string_literal)) { 1530 Diag(StrTok, diag::err_pp_line_invalid_filename); 1531 DiscardUntilEndOfDirective(); 1532 return; 1533 } else if (StrTok.hasUDSuffix()) { 1534 Diag(StrTok, diag::err_invalid_string_udl); 1535 DiscardUntilEndOfDirective(); 1536 return; 1537 } else { 1538 // Parse and validate the string, converting it into a unique ID. 1539 StringLiteralParser Literal(StrTok, *this); 1540 assert(Literal.isOrdinary() && "Didn't allow wide strings in"); 1541 if (Literal.hadError) { 1542 DiscardUntilEndOfDirective(); 1543 return; 1544 } 1545 if (Literal.Pascal) { 1546 Diag(StrTok, diag::err_pp_linemarker_invalid_filename); 1547 DiscardUntilEndOfDirective(); 1548 return; 1549 } 1550 FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); 1551 1552 // Verify that there is nothing after the string, other than EOD. Because 1553 // of C99 6.10.4p5, macros that expand to empty tokens are ok. 1554 CheckEndOfDirective("line", true); 1555 } 1556 1557 // Take the file kind of the file containing the #line directive. #line 1558 // directives are often used for generated sources from the same codebase, so 1559 // the new file should generally be classified the same way as the current 1560 // file. This is visible in GCC's pre-processed output, which rewrites #line 1561 // to GNU line markers. 1562 SrcMgr::CharacteristicKind FileKind = 1563 SourceMgr.getFileCharacteristic(DigitTok.getLocation()); 1564 1565 SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false, 1566 false, FileKind); 1567 1568 if (Callbacks) 1569 Callbacks->FileChanged(CurPPLexer->getSourceLocation(), 1570 PPCallbacks::RenameFile, FileKind); 1571 } 1572 1573 /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line 1574 /// marker directive. 1575 static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, 1576 SrcMgr::CharacteristicKind &FileKind, 1577 Preprocessor &PP) { 1578 unsigned FlagVal; 1579 Token FlagTok; 1580 PP.Lex(FlagTok); 1581 if (FlagTok.is(tok::eod)) return false; 1582 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP)) 1583 return true; 1584 1585 if (FlagVal == 1) { 1586 IsFileEntry = true; 1587 1588 PP.Lex(FlagTok); 1589 if (FlagTok.is(tok::eod)) return false; 1590 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP)) 1591 return true; 1592 } else if (FlagVal == 2) { 1593 IsFileExit = true; 1594 1595 SourceManager &SM = PP.getSourceManager(); 1596 // If we are leaving the current presumed file, check to make sure the 1597 // presumed include stack isn't empty! 1598 FileID CurFileID = 1599 SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first; 1600 PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation()); 1601 if (PLoc.isInvalid()) 1602 return true; 1603 1604 // If there is no include loc (main file) or if the include loc is in a 1605 // different physical file, then we aren't in a "1" line marker flag region. 1606 SourceLocation IncLoc = PLoc.getIncludeLoc(); 1607 if (IncLoc.isInvalid() || 1608 SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) { 1609 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop); 1610 PP.DiscardUntilEndOfDirective(); 1611 return true; 1612 } 1613 1614 PP.Lex(FlagTok); 1615 if (FlagTok.is(tok::eod)) return false; 1616 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP)) 1617 return true; 1618 } 1619 1620 // We must have 3 if there are still flags. 1621 if (FlagVal != 3) { 1622 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag); 1623 PP.DiscardUntilEndOfDirective(); 1624 return true; 1625 } 1626 1627 FileKind = SrcMgr::C_System; 1628 1629 PP.Lex(FlagTok); 1630 if (FlagTok.is(tok::eod)) return false; 1631 if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP)) 1632 return true; 1633 1634 // We must have 4 if there is yet another flag. 1635 if (FlagVal != 4) { 1636 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag); 1637 PP.DiscardUntilEndOfDirective(); 1638 return true; 1639 } 1640 1641 FileKind = SrcMgr::C_ExternCSystem; 1642 1643 PP.Lex(FlagTok); 1644 if (FlagTok.is(tok::eod)) return false; 1645 1646 // There are no more valid flags here. 1647 PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag); 1648 PP.DiscardUntilEndOfDirective(); 1649 return true; 1650 } 1651 1652 /// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is 1653 /// one of the following forms: 1654 /// 1655 /// # 42 1656 /// # 42 "file" ('1' | '2')? 1657 /// # 42 "file" ('1' | '2')? '3' '4'? 1658 /// 1659 void Preprocessor::HandleDigitDirective(Token &DigitTok) { 1660 // Validate the number and convert it to an unsigned. GNU does not have a 1661 // line # limit other than it fit in 32-bits. 1662 unsigned LineNo; 1663 if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer, 1664 *this, true)) 1665 return; 1666 1667 Token StrTok; 1668 Lex(StrTok); 1669 1670 bool IsFileEntry = false, IsFileExit = false; 1671 int FilenameID = -1; 1672 SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; 1673 1674 // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a 1675 // string followed by eod. 1676 if (StrTok.is(tok::eod)) { 1677 Diag(StrTok, diag::ext_pp_gnu_line_directive); 1678 // Treat this like "#line NN", which doesn't change file characteristics. 1679 FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation()); 1680 } else if (StrTok.isNot(tok::string_literal)) { 1681 Diag(StrTok, diag::err_pp_linemarker_invalid_filename); 1682 DiscardUntilEndOfDirective(); 1683 return; 1684 } else if (StrTok.hasUDSuffix()) { 1685 Diag(StrTok, diag::err_invalid_string_udl); 1686 DiscardUntilEndOfDirective(); 1687 return; 1688 } else { 1689 // Parse and validate the string, converting it into a unique ID. 1690 StringLiteralParser Literal(StrTok, *this); 1691 assert(Literal.isOrdinary() && "Didn't allow wide strings in"); 1692 if (Literal.hadError) { 1693 DiscardUntilEndOfDirective(); 1694 return; 1695 } 1696 if (Literal.Pascal) { 1697 Diag(StrTok, diag::err_pp_linemarker_invalid_filename); 1698 DiscardUntilEndOfDirective(); 1699 return; 1700 } 1701 1702 // If a filename was present, read any flags that are present. 1703 if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this)) 1704 return; 1705 if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) && 1706 !SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation())) 1707 Diag(StrTok, diag::ext_pp_gnu_line_directive); 1708 1709 // Exiting to an empty string means pop to the including file, so leave 1710 // FilenameID as -1 in that case. 1711 if (!(IsFileExit && Literal.GetString().empty())) 1712 FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); 1713 } 1714 1715 // Create a line note with this information. 1716 SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry, 1717 IsFileExit, FileKind); 1718 1719 // If the preprocessor has callbacks installed, notify them of the #line 1720 // change. This is used so that the line marker comes out in -E mode for 1721 // example. 1722 if (Callbacks) { 1723 PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile; 1724 if (IsFileEntry) 1725 Reason = PPCallbacks::EnterFile; 1726 else if (IsFileExit) 1727 Reason = PPCallbacks::ExitFile; 1728 1729 Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind); 1730 } 1731 } 1732 1733 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive. 1734 /// 1735 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, 1736 bool isWarning) { 1737 // Read the rest of the line raw. We do this because we don't want macros 1738 // to be expanded and we don't require that the tokens be valid preprocessing 1739 // tokens. For example, this is allowed: "#warning ` 'foo". GCC does 1740 // collapse multiple consecutive white space between tokens, but this isn't 1741 // specified by the standard. 1742 SmallString<128> Message; 1743 CurLexer->ReadToEndOfLine(&Message); 1744 1745 // Find the first non-whitespace character, so that we can make the 1746 // diagnostic more succinct. 1747 StringRef Msg = Message.str().ltrim(' '); 1748 1749 if (isWarning) 1750 Diag(Tok, diag::pp_hash_warning) << Msg; 1751 else 1752 Diag(Tok, diag::err_pp_hash_error) << Msg; 1753 } 1754 1755 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive. 1756 /// 1757 void Preprocessor::HandleIdentSCCSDirective(Token &Tok) { 1758 // Yes, this directive is an extension. 1759 Diag(Tok, diag::ext_pp_ident_directive); 1760 1761 // Read the string argument. 1762 Token StrTok; 1763 Lex(StrTok); 1764 1765 // If the token kind isn't a string, it's a malformed directive. 1766 if (StrTok.isNot(tok::string_literal) && 1767 StrTok.isNot(tok::wide_string_literal)) { 1768 Diag(StrTok, diag::err_pp_malformed_ident); 1769 if (StrTok.isNot(tok::eod)) 1770 DiscardUntilEndOfDirective(); 1771 return; 1772 } 1773 1774 if (StrTok.hasUDSuffix()) { 1775 Diag(StrTok, diag::err_invalid_string_udl); 1776 DiscardUntilEndOfDirective(); 1777 return; 1778 } 1779 1780 // Verify that there is nothing after the string, other than EOD. 1781 CheckEndOfDirective("ident"); 1782 1783 if (Callbacks) { 1784 bool Invalid = false; 1785 std::string Str = getSpelling(StrTok, &Invalid); 1786 if (!Invalid) 1787 Callbacks->Ident(Tok.getLocation(), Str); 1788 } 1789 } 1790 1791 /// Handle a #public directive. 1792 void Preprocessor::HandleMacroPublicDirective(Token &Tok) { 1793 Token MacroNameTok; 1794 ReadMacroName(MacroNameTok, MU_Undef); 1795 1796 // Error reading macro name? If so, diagnostic already issued. 1797 if (MacroNameTok.is(tok::eod)) 1798 return; 1799 1800 // Check to see if this is the last token on the #__public_macro line. 1801 CheckEndOfDirective("__public_macro"); 1802 1803 IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); 1804 // Okay, we finally have a valid identifier to undef. 1805 MacroDirective *MD = getLocalMacroDirective(II); 1806 1807 // If the macro is not defined, this is an error. 1808 if (!MD) { 1809 Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II; 1810 return; 1811 } 1812 1813 // Note that this macro has now been exported. 1814 appendMacroDirective(II, AllocateVisibilityMacroDirective( 1815 MacroNameTok.getLocation(), /*isPublic=*/true)); 1816 } 1817 1818 /// Handle a #private directive. 1819 void Preprocessor::HandleMacroPrivateDirective() { 1820 Token MacroNameTok; 1821 ReadMacroName(MacroNameTok, MU_Undef); 1822 1823 // Error reading macro name? If so, diagnostic already issued. 1824 if (MacroNameTok.is(tok::eod)) 1825 return; 1826 1827 // Check to see if this is the last token on the #__private_macro line. 1828 CheckEndOfDirective("__private_macro"); 1829 1830 IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); 1831 // Okay, we finally have a valid identifier to undef. 1832 MacroDirective *MD = getLocalMacroDirective(II); 1833 1834 // If the macro is not defined, this is an error. 1835 if (!MD) { 1836 Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II; 1837 return; 1838 } 1839 1840 // Note that this macro has now been marked private. 1841 appendMacroDirective(II, AllocateVisibilityMacroDirective( 1842 MacroNameTok.getLocation(), /*isPublic=*/false)); 1843 } 1844 1845 //===----------------------------------------------------------------------===// 1846 // Preprocessor Include Directive Handling. 1847 //===----------------------------------------------------------------------===// 1848 1849 /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully 1850 /// checked and spelled filename, e.g. as an operand of \#include. This returns 1851 /// true if the input filename was in <>'s or false if it were in ""'s. The 1852 /// caller is expected to provide a buffer that is large enough to hold the 1853 /// spelling of the filename, but is also expected to handle the case when 1854 /// this method decides to use a different buffer. 1855 bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, 1856 StringRef &Buffer) { 1857 // Get the text form of the filename. 1858 assert(!Buffer.empty() && "Can't have tokens with empty spellings!"); 1859 1860 // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and 1861 // C++20 [lex.header]/2: 1862 // 1863 // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then 1864 // in C: behavior is undefined 1865 // in C++: program is conditionally-supported with implementation-defined 1866 // semantics 1867 1868 // Make sure the filename is <x> or "x". 1869 bool isAngled; 1870 if (Buffer[0] == '<') { 1871 if (Buffer.back() != '>') { 1872 Diag(Loc, diag::err_pp_expects_filename); 1873 Buffer = StringRef(); 1874 return true; 1875 } 1876 isAngled = true; 1877 } else if (Buffer[0] == '"') { 1878 if (Buffer.back() != '"') { 1879 Diag(Loc, diag::err_pp_expects_filename); 1880 Buffer = StringRef(); 1881 return true; 1882 } 1883 isAngled = false; 1884 } else { 1885 Diag(Loc, diag::err_pp_expects_filename); 1886 Buffer = StringRef(); 1887 return true; 1888 } 1889 1890 // Diagnose #include "" as invalid. 1891 if (Buffer.size() <= 2) { 1892 Diag(Loc, diag::err_pp_empty_filename); 1893 Buffer = StringRef(); 1894 return true; 1895 } 1896 1897 // Skip the brackets. 1898 Buffer = Buffer.substr(1, Buffer.size()-2); 1899 return isAngled; 1900 } 1901 1902 /// Push a token onto the token stream containing an annotation. 1903 void Preprocessor::EnterAnnotationToken(SourceRange Range, 1904 tok::TokenKind Kind, 1905 void *AnnotationVal) { 1906 // FIXME: Produce this as the current token directly, rather than 1907 // allocating a new token for it. 1908 auto Tok = std::make_unique<Token[]>(1); 1909 Tok[0].startToken(); 1910 Tok[0].setKind(Kind); 1911 Tok[0].setLocation(Range.getBegin()); 1912 Tok[0].setAnnotationEndLoc(Range.getEnd()); 1913 Tok[0].setAnnotationValue(AnnotationVal); 1914 EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false); 1915 } 1916 1917 /// Produce a diagnostic informing the user that a #include or similar 1918 /// was implicitly treated as a module import. 1919 static void diagnoseAutoModuleImport( 1920 Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok, 1921 ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path, 1922 SourceLocation PathEnd) { 1923 SmallString<128> PathString; 1924 for (size_t I = 0, N = Path.size(); I != N; ++I) { 1925 if (I) 1926 PathString += '.'; 1927 PathString += Path[I].first->getName(); 1928 } 1929 1930 int IncludeKind = 0; 1931 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) { 1932 case tok::pp_include: 1933 IncludeKind = 0; 1934 break; 1935 1936 case tok::pp_import: 1937 IncludeKind = 1; 1938 break; 1939 1940 case tok::pp_include_next: 1941 IncludeKind = 2; 1942 break; 1943 1944 case tok::pp___include_macros: 1945 IncludeKind = 3; 1946 break; 1947 1948 default: 1949 llvm_unreachable("unknown include directive kind"); 1950 } 1951 1952 PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation) 1953 << IncludeKind << PathString; 1954 } 1955 1956 // Given a vector of path components and a string containing the real 1957 // path to the file, build a properly-cased replacement in the vector, 1958 // and return true if the replacement should be suggested. 1959 static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components, 1960 StringRef RealPathName, 1961 llvm::sys::path::Style Separator) { 1962 auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName); 1963 auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName); 1964 int Cnt = 0; 1965 bool SuggestReplacement = false; 1966 1967 auto IsSep = [Separator](StringRef Component) { 1968 return Component.size() == 1 && 1969 llvm::sys::path::is_separator(Component[0], Separator); 1970 }; 1971 1972 // Below is a best-effort to handle ".." in paths. It is admittedly 1973 // not 100% correct in the presence of symlinks. 1974 for (auto &Component : llvm::reverse(Components)) { 1975 if ("." == Component) { 1976 } else if (".." == Component) { 1977 ++Cnt; 1978 } else if (Cnt) { 1979 --Cnt; 1980 } else if (RealPathComponentIter != RealPathComponentEnd) { 1981 if (!IsSep(Component) && !IsSep(*RealPathComponentIter) && 1982 Component != *RealPathComponentIter) { 1983 // If these non-separator path components differ by more than just case, 1984 // then we may be looking at symlinked paths. Bail on this diagnostic to 1985 // avoid noisy false positives. 1986 SuggestReplacement = 1987 RealPathComponentIter->equals_insensitive(Component); 1988 if (!SuggestReplacement) 1989 break; 1990 Component = *RealPathComponentIter; 1991 } 1992 ++RealPathComponentIter; 1993 } 1994 } 1995 return SuggestReplacement; 1996 } 1997 1998 bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts, 1999 const TargetInfo &TargetInfo, 2000 const Module &M, 2001 DiagnosticsEngine &Diags) { 2002 Module::Requirement Requirement; 2003 Module::UnresolvedHeaderDirective MissingHeader; 2004 Module *ShadowingModule = nullptr; 2005 if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader, 2006 ShadowingModule)) 2007 return false; 2008 2009 if (MissingHeader.FileNameLoc.isValid()) { 2010 Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing) 2011 << MissingHeader.IsUmbrella << MissingHeader.FileName; 2012 } else if (ShadowingModule) { 2013 Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name; 2014 Diags.Report(ShadowingModule->DefinitionLoc, 2015 diag::note_previous_definition); 2016 } else { 2017 // FIXME: Track the location at which the requirement was specified, and 2018 // use it here. 2019 Diags.Report(M.DefinitionLoc, diag::err_module_unavailable) 2020 << M.getFullModuleName() << Requirement.RequiredState 2021 << Requirement.FeatureName; 2022 } 2023 return true; 2024 } 2025 2026 std::pair<ConstSearchDirIterator, const FileEntry *> 2027 Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const { 2028 // #include_next is like #include, except that we start searching after 2029 // the current found directory. If we can't do this, issue a 2030 // diagnostic. 2031 ConstSearchDirIterator Lookup = CurDirLookup; 2032 const FileEntry *LookupFromFile = nullptr; 2033 2034 if (isInPrimaryFile() && LangOpts.IsHeaderFile) { 2035 // If the main file is a header, then it's either for PCH/AST generation, 2036 // or libclang opened it. Either way, handle it as a normal include below 2037 // and do not complain about include_next. 2038 } else if (isInPrimaryFile()) { 2039 Lookup = nullptr; 2040 Diag(IncludeNextTok, diag::pp_include_next_in_primary); 2041 } else if (CurLexerSubmodule) { 2042 // Start looking up in the directory *after* the one in which the current 2043 // file would be found, if any. 2044 assert(CurPPLexer && "#include_next directive in macro?"); 2045 if (auto FE = CurPPLexer->getFileEntry()) 2046 LookupFromFile = *FE; 2047 Lookup = nullptr; 2048 } else if (!Lookup) { 2049 // The current file was not found by walking the include path. Either it 2050 // is the primary file (handled above), or it was found by absolute path, 2051 // or it was found relative to such a file. 2052 // FIXME: Track enough information so we know which case we're in. 2053 Diag(IncludeNextTok, diag::pp_include_next_absolute_path); 2054 } else { 2055 // Start looking up in the next directory. 2056 ++Lookup; 2057 } 2058 2059 return {Lookup, LookupFromFile}; 2060 } 2061 2062 /// HandleIncludeDirective - The "\#include" tokens have just been read, read 2063 /// the file to be included from the lexer, then include it! This is a common 2064 /// routine with functionality shared between \#include, \#include_next and 2065 /// \#import. LookupFrom is set when this is a \#include_next directive, it 2066 /// specifies the file to start searching from. 2067 void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, 2068 Token &IncludeTok, 2069 ConstSearchDirIterator LookupFrom, 2070 const FileEntry *LookupFromFile) { 2071 Token FilenameTok; 2072 if (LexHeaderName(FilenameTok)) 2073 return; 2074 2075 if (FilenameTok.isNot(tok::header_name)) { 2076 Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); 2077 if (FilenameTok.isNot(tok::eod)) 2078 DiscardUntilEndOfDirective(); 2079 return; 2080 } 2081 2082 // Verify that there is nothing after the filename, other than EOD. Note 2083 // that we allow macros that expand to nothing after the filename, because 2084 // this falls into the category of "#include pp-tokens new-line" specified 2085 // in C99 6.10.2p4. 2086 SourceLocation EndLoc = 2087 CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true); 2088 2089 auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok, 2090 EndLoc, LookupFrom, LookupFromFile); 2091 switch (Action.Kind) { 2092 case ImportAction::None: 2093 case ImportAction::SkippedModuleImport: 2094 break; 2095 case ImportAction::ModuleBegin: 2096 EnterAnnotationToken(SourceRange(HashLoc, EndLoc), 2097 tok::annot_module_begin, Action.ModuleForHeader); 2098 break; 2099 case ImportAction::HeaderUnitImport: 2100 EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit, 2101 Action.ModuleForHeader); 2102 break; 2103 case ImportAction::ModuleImport: 2104 EnterAnnotationToken(SourceRange(HashLoc, EndLoc), 2105 tok::annot_module_include, Action.ModuleForHeader); 2106 break; 2107 case ImportAction::Failure: 2108 assert(TheModuleLoader.HadFatalFailure && 2109 "This should be an early exit only to a fatal error"); 2110 TheModuleLoader.HadFatalFailure = true; 2111 IncludeTok.setKind(tok::eof); 2112 CurLexer->cutOffLexing(); 2113 return; 2114 } 2115 } 2116 2117 OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport( 2118 ConstSearchDirIterator *CurDir, StringRef &Filename, 2119 SourceLocation FilenameLoc, CharSourceRange FilenameRange, 2120 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, 2121 bool &IsMapped, ConstSearchDirIterator LookupFrom, 2122 const FileEntry *LookupFromFile, StringRef &LookupFilename, 2123 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath, 2124 ModuleMap::KnownHeader &SuggestedModule, bool isAngled) { 2125 auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) { 2126 if (LangOpts.AsmPreprocessor) 2127 return; 2128 2129 Module *RequestingModule = getModuleForLocation( 2130 FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes); 2131 bool RequestingModuleIsModuleInterface = 2132 !SourceMgr.isInMainFile(FilenameLoc); 2133 2134 HeaderInfo.getModuleMap().diagnoseHeaderInclusion( 2135 RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc, 2136 Filename, FE); 2137 }; 2138 2139 OptionalFileEntryRef File = LookupFile( 2140 FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir, 2141 Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, 2142 &SuggestedModule, &IsMapped, &IsFrameworkFound); 2143 if (File) { 2144 DiagnoseHeaderInclusion(*File); 2145 return File; 2146 } 2147 2148 // Give the clients a chance to silently skip this include. 2149 if (Callbacks && Callbacks->FileNotFound(Filename)) 2150 return std::nullopt; 2151 2152 if (SuppressIncludeNotFoundError) 2153 return std::nullopt; 2154 2155 // If the file could not be located and it was included via angle 2156 // brackets, we can attempt a lookup as though it were a quoted path to 2157 // provide the user with a possible fixit. 2158 if (isAngled) { 2159 OptionalFileEntryRef File = LookupFile( 2160 FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir, 2161 Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr, 2162 &SuggestedModule, &IsMapped, 2163 /*IsFrameworkFound=*/nullptr); 2164 if (File) { 2165 DiagnoseHeaderInclusion(*File); 2166 Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal) 2167 << Filename << IsImportDecl 2168 << FixItHint::CreateReplacement(FilenameRange, 2169 "\"" + Filename.str() + "\""); 2170 return File; 2171 } 2172 } 2173 2174 // Check for likely typos due to leading or trailing non-isAlphanumeric 2175 // characters 2176 StringRef OriginalFilename = Filename; 2177 if (LangOpts.SpellChecking) { 2178 // A heuristic to correct a typo file name by removing leading and 2179 // trailing non-isAlphanumeric characters. 2180 auto CorrectTypoFilename = [](llvm::StringRef Filename) { 2181 Filename = Filename.drop_until(isAlphanumeric); 2182 while (!Filename.empty() && !isAlphanumeric(Filename.back())) { 2183 Filename = Filename.drop_back(); 2184 } 2185 return Filename; 2186 }; 2187 StringRef TypoCorrectionName = CorrectTypoFilename(Filename); 2188 StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename); 2189 2190 OptionalFileEntryRef File = LookupFile( 2191 FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom, 2192 LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr, 2193 Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped, 2194 /*IsFrameworkFound=*/nullptr); 2195 if (File) { 2196 DiagnoseHeaderInclusion(*File); 2197 auto Hint = 2198 isAngled ? FixItHint::CreateReplacement( 2199 FilenameRange, "<" + TypoCorrectionName.str() + ">") 2200 : FixItHint::CreateReplacement( 2201 FilenameRange, "\"" + TypoCorrectionName.str() + "\""); 2202 Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal) 2203 << OriginalFilename << TypoCorrectionName << Hint; 2204 // We found the file, so set the Filename to the name after typo 2205 // correction. 2206 Filename = TypoCorrectionName; 2207 LookupFilename = TypoCorrectionLookupName; 2208 return File; 2209 } 2210 } 2211 2212 // If the file is still not found, just go with the vanilla diagnostic 2213 assert(!File && "expected missing file"); 2214 Diag(FilenameTok, diag::err_pp_file_not_found) 2215 << OriginalFilename << FilenameRange; 2216 if (IsFrameworkFound) { 2217 size_t SlashPos = OriginalFilename.find('/'); 2218 assert(SlashPos != StringRef::npos && 2219 "Include with framework name should have '/' in the filename"); 2220 StringRef FrameworkName = OriginalFilename.substr(0, SlashPos); 2221 FrameworkCacheEntry &CacheEntry = 2222 HeaderInfo.LookupFrameworkCache(FrameworkName); 2223 assert(CacheEntry.Directory && "Found framework should be in cache"); 2224 Diag(FilenameTok, diag::note_pp_framework_without_header) 2225 << OriginalFilename.substr(SlashPos + 1) << FrameworkName 2226 << CacheEntry.Directory->getName(); 2227 } 2228 2229 return std::nullopt; 2230 } 2231 2232 /// Handle either a #include-like directive or an import declaration that names 2233 /// a header file. 2234 /// 2235 /// \param HashLoc The location of the '#' token for an include, or 2236 /// SourceLocation() for an import declaration. 2237 /// \param IncludeTok The include / include_next / import token. 2238 /// \param FilenameTok The header-name token. 2239 /// \param EndLoc The location at which any imported macros become visible. 2240 /// \param LookupFrom For #include_next, the starting directory for the 2241 /// directory lookup. 2242 /// \param LookupFromFile For #include_next, the starting file for the directory 2243 /// lookup. 2244 Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( 2245 SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok, 2246 SourceLocation EndLoc, ConstSearchDirIterator LookupFrom, 2247 const FileEntry *LookupFromFile) { 2248 SmallString<128> FilenameBuffer; 2249 StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); 2250 SourceLocation CharEnd = FilenameTok.getEndLoc(); 2251 2252 CharSourceRange FilenameRange 2253 = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd); 2254 StringRef OriginalFilename = Filename; 2255 bool isAngled = 2256 GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); 2257 2258 // If GetIncludeFilenameSpelling set the start ptr to null, there was an 2259 // error. 2260 if (Filename.empty()) 2261 return {ImportAction::None}; 2262 2263 bool IsImportDecl = HashLoc.isInvalid(); 2264 SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc; 2265 2266 // Complain about attempts to #include files in an audit pragma. 2267 if (PragmaARCCFCodeAuditedInfo.second.isValid()) { 2268 Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl; 2269 Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here); 2270 2271 // Immediately leave the pragma. 2272 PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()}; 2273 } 2274 2275 // Complain about attempts to #include files in an assume-nonnull pragma. 2276 if (PragmaAssumeNonNullLoc.isValid()) { 2277 Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl; 2278 Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here); 2279 2280 // Immediately leave the pragma. 2281 PragmaAssumeNonNullLoc = SourceLocation(); 2282 } 2283 2284 if (HeaderInfo.HasIncludeAliasMap()) { 2285 // Map the filename with the brackets still attached. If the name doesn't 2286 // map to anything, fall back on the filename we've already gotten the 2287 // spelling for. 2288 StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename); 2289 if (!NewName.empty()) 2290 Filename = NewName; 2291 } 2292 2293 // Search include directories. 2294 bool IsMapped = false; 2295 bool IsFrameworkFound = false; 2296 ConstSearchDirIterator CurDir = nullptr; 2297 SmallString<1024> SearchPath; 2298 SmallString<1024> RelativePath; 2299 // We get the raw path only if we have 'Callbacks' to which we later pass 2300 // the path. 2301 ModuleMap::KnownHeader SuggestedModule; 2302 SourceLocation FilenameLoc = FilenameTok.getLocation(); 2303 StringRef LookupFilename = Filename; 2304 2305 // Normalize slashes when compiling with -fms-extensions on non-Windows. This 2306 // is unnecessary on Windows since the filesystem there handles backslashes. 2307 SmallString<128> NormalizedPath; 2308 llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native; 2309 if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) { 2310 NormalizedPath = Filename.str(); 2311 llvm::sys::path::native(NormalizedPath); 2312 LookupFilename = NormalizedPath; 2313 BackslashStyle = llvm::sys::path::Style::windows; 2314 } 2315 2316 OptionalFileEntryRef File = LookupHeaderIncludeOrImport( 2317 &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok, 2318 IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile, 2319 LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled); 2320 2321 if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) { 2322 if (File && isPCHThroughHeader(&File->getFileEntry())) 2323 SkippingUntilPCHThroughHeader = false; 2324 return {ImportAction::None}; 2325 } 2326 2327 // Should we enter the source file? Set to Skip if either the source file is 2328 // known to have no effect beyond its effect on module visibility -- that is, 2329 // if it's got an include guard that is already defined, set to Import if it 2330 // is a modular header we've already built and should import. 2331 2332 // For C++20 Modules 2333 // [cpp.include]/7 If the header identified by the header-name denotes an 2334 // importable header, it is implementation-defined whether the #include 2335 // preprocessing directive is instead replaced by an import directive. 2336 // For this implementation, the translation is permitted when we are parsing 2337 // the Global Module Fragment, and not otherwise (the cases where it would be 2338 // valid to replace an include with an import are highly constrained once in 2339 // named module purview; this choice avoids considerable complexity in 2340 // determining valid cases). 2341 2342 enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter; 2343 2344 if (PPOpts->SingleFileParseMode) 2345 Action = IncludeLimitReached; 2346 2347 // If we've reached the max allowed include depth, it is usually due to an 2348 // include cycle. Don't enter already processed files again as it can lead to 2349 // reaching the max allowed include depth again. 2350 if (Action == Enter && HasReachedMaxIncludeDepth && File && 2351 alreadyIncluded(*File)) 2352 Action = IncludeLimitReached; 2353 2354 // FIXME: We do not have a good way to disambiguate C++ clang modules from 2355 // C++ standard modules (other than use/non-use of Header Units). 2356 2357 Module *ModuleToImport = SuggestedModule.getModule(); 2358 2359 bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport && 2360 !ModuleToImport->isForBuilding(getLangOpts()); 2361 2362 // Maybe a usable Header Unit 2363 bool UsableHeaderUnit = false; 2364 if (getLangOpts().CPlusPlusModules && ModuleToImport && 2365 ModuleToImport->isHeaderUnit()) { 2366 if (TrackGMFState.inGMF() || IsImportDecl) 2367 UsableHeaderUnit = true; 2368 else if (!IsImportDecl) { 2369 // This is a Header Unit that we do not include-translate 2370 ModuleToImport = nullptr; 2371 } 2372 } 2373 // Maybe a usable clang header module. 2374 bool UsableClangHeaderModule = 2375 (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && 2376 ModuleToImport && !ModuleToImport->isHeaderUnit(); 2377 2378 // Determine whether we should try to import the module for this #include, if 2379 // there is one. Don't do so if precompiled module support is disabled or we 2380 // are processing this module textually (because we're building the module). 2381 if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) { 2382 // If this include corresponds to a module but that module is 2383 // unavailable, diagnose the situation and bail out. 2384 // FIXME: Remove this; loadModule does the same check (but produces 2385 // slightly worse diagnostics). 2386 if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), *ModuleToImport, 2387 getDiagnostics())) { 2388 Diag(FilenameTok.getLocation(), 2389 diag::note_implicit_top_level_module_import_here) 2390 << ModuleToImport->getTopLevelModuleName(); 2391 return {ImportAction::None}; 2392 } 2393 2394 // Compute the module access path corresponding to this module. 2395 // FIXME: Should we have a second loadModule() overload to avoid this 2396 // extra lookup step? 2397 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path; 2398 for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent) 2399 Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name), 2400 FilenameTok.getLocation())); 2401 std::reverse(Path.begin(), Path.end()); 2402 2403 // Warn that we're replacing the include/import with a module import. 2404 if (!IsImportDecl) 2405 diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd); 2406 2407 // Load the module to import its macros. We'll make the declarations 2408 // visible when the parser gets here. 2409 // FIXME: Pass ModuleToImport in here rather than converting it to a path 2410 // and making the module loader convert it back again. 2411 ModuleLoadResult Imported = TheModuleLoader.loadModule( 2412 IncludeTok.getLocation(), Path, Module::Hidden, 2413 /*IsInclusionDirective=*/true); 2414 assert((Imported == nullptr || Imported == ModuleToImport) && 2415 "the imported module is different than the suggested one"); 2416 2417 if (Imported) { 2418 Action = Import; 2419 } else if (Imported.isMissingExpected()) { 2420 markClangModuleAsAffecting( 2421 static_cast<Module *>(Imported)->getTopLevelModule()); 2422 // We failed to find a submodule that we assumed would exist (because it 2423 // was in the directory of an umbrella header, for instance), but no 2424 // actual module containing it exists (because the umbrella header is 2425 // incomplete). Treat this as a textual inclusion. 2426 ModuleToImport = nullptr; 2427 } else if (Imported.isConfigMismatch()) { 2428 // On a configuration mismatch, enter the header textually. We still know 2429 // that it's part of the corresponding module. 2430 } else { 2431 // We hit an error processing the import. Bail out. 2432 if (hadModuleLoaderFatalFailure()) { 2433 // With a fatal failure in the module loader, we abort parsing. 2434 Token &Result = IncludeTok; 2435 assert(CurLexer && "#include but no current lexer set!"); 2436 Result.startToken(); 2437 CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof); 2438 CurLexer->cutOffLexing(); 2439 } 2440 return {ImportAction::None}; 2441 } 2442 } 2443 2444 // The #included file will be considered to be a system header if either it is 2445 // in a system include directory, or if the #includer is a system include 2446 // header. 2447 SrcMgr::CharacteristicKind FileCharacter = 2448 SourceMgr.getFileCharacteristic(FilenameTok.getLocation()); 2449 if (File) 2450 FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter); 2451 2452 // If this is a '#import' or an import-declaration, don't re-enter the file. 2453 // 2454 // FIXME: If we have a suggested module for a '#include', and we've already 2455 // visited this file, don't bother entering it again. We know it has no 2456 // further effect. 2457 bool EnterOnce = 2458 IsImportDecl || 2459 IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import; 2460 2461 bool IsFirstIncludeOfFile = false; 2462 2463 // Ask HeaderInfo if we should enter this #include file. If not, #including 2464 // this file will have no effect. 2465 if (Action == Enter && File && 2466 !HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce, 2467 getLangOpts().Modules, ModuleToImport, 2468 IsFirstIncludeOfFile)) { 2469 // C++ standard modules: 2470 // If we are not in the GMF, then we textually include only 2471 // clang modules: 2472 // Even if we've already preprocessed this header once and know that we 2473 // don't need to see its contents again, we still need to import it if it's 2474 // modular because we might not have imported it from this submodule before. 2475 // 2476 // FIXME: We don't do this when compiling a PCH because the AST 2477 // serialization layer can't cope with it. This means we get local 2478 // submodule visibility semantics wrong in that case. 2479 if (UsableHeaderUnit && !getLangOpts().CompilingPCH) 2480 Action = TrackGMFState.inGMF() ? Import : Skip; 2481 else 2482 Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip; 2483 } 2484 2485 // Check for circular inclusion of the main file. 2486 // We can't generate a consistent preamble with regard to the conditional 2487 // stack if the main file is included again as due to the preamble bounds 2488 // some directives (e.g. #endif of a header guard) will never be seen. 2489 // Since this will lead to confusing errors, avoid the inclusion. 2490 if (Action == Enter && File && PreambleConditionalStack.isRecording() && 2491 SourceMgr.isMainFile(File->getFileEntry())) { 2492 Diag(FilenameTok.getLocation(), 2493 diag::err_pp_including_mainfile_in_preamble); 2494 return {ImportAction::None}; 2495 } 2496 2497 if (Callbacks && !IsImportDecl) { 2498 // Notify the callback object that we've seen an inclusion directive. 2499 // FIXME: Use a different callback for a pp-import? 2500 Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled, 2501 FilenameRange, File, SearchPath, RelativePath, 2502 SuggestedModule.getModule(), Action == Import, 2503 FileCharacter); 2504 if (Action == Skip && File) 2505 Callbacks->FileSkipped(*File, FilenameTok, FileCharacter); 2506 } 2507 2508 if (!File) 2509 return {ImportAction::None}; 2510 2511 // If this is a C++20 pp-import declaration, diagnose if we didn't find any 2512 // module corresponding to the named header. 2513 if (IsImportDecl && !ModuleToImport) { 2514 Diag(FilenameTok, diag::err_header_import_not_header_unit) 2515 << OriginalFilename << File->getName(); 2516 return {ImportAction::None}; 2517 } 2518 2519 // Issue a diagnostic if the name of the file on disk has a different case 2520 // than the one we're about to open. 2521 const bool CheckIncludePathPortability = 2522 !IsMapped && !File->getFileEntry().tryGetRealPathName().empty(); 2523 2524 if (CheckIncludePathPortability) { 2525 StringRef Name = LookupFilename; 2526 StringRef NameWithoriginalSlashes = Filename; 2527 #if defined(_WIN32) 2528 // Skip UNC prefix if present. (tryGetRealPathName() always 2529 // returns a path with the prefix skipped.) 2530 bool NameWasUNC = Name.consume_front("\\\\?\\"); 2531 NameWithoriginalSlashes.consume_front("\\\\?\\"); 2532 #endif 2533 StringRef RealPathName = File->getFileEntry().tryGetRealPathName(); 2534 SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name), 2535 llvm::sys::path::end(Name)); 2536 #if defined(_WIN32) 2537 // -Wnonportable-include-path is designed to diagnose includes using 2538 // case even on systems with a case-insensitive file system. 2539 // On Windows, RealPathName always starts with an upper-case drive 2540 // letter for absolute paths, but Name might start with either 2541 // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell. 2542 // ("foo" will always have on-disk case, no matter which case was 2543 // used in the cd command). To not emit this warning solely for 2544 // the drive letter, whose case is dependent on if `cd` is used 2545 // with upper- or lower-case drive letters, always consider the 2546 // given drive letter case as correct for the purpose of this warning. 2547 SmallString<128> FixedDriveRealPath; 2548 if (llvm::sys::path::is_absolute(Name) && 2549 llvm::sys::path::is_absolute(RealPathName) && 2550 toLowercase(Name[0]) == toLowercase(RealPathName[0]) && 2551 isLowercase(Name[0]) != isLowercase(RealPathName[0])) { 2552 assert(Components.size() >= 3 && "should have drive, backslash, name"); 2553 assert(Components[0].size() == 2 && "should start with drive"); 2554 assert(Components[0][1] == ':' && "should have colon"); 2555 FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str(); 2556 RealPathName = FixedDriveRealPath; 2557 } 2558 #endif 2559 2560 if (trySimplifyPath(Components, RealPathName, BackslashStyle)) { 2561 SmallString<128> Path; 2562 Path.reserve(Name.size()+2); 2563 Path.push_back(isAngled ? '<' : '"'); 2564 2565 const auto IsSep = [BackslashStyle](char c) { 2566 return llvm::sys::path::is_separator(c, BackslashStyle); 2567 }; 2568 2569 for (auto Component : Components) { 2570 // On POSIX, Components will contain a single '/' as first element 2571 // exactly if Name is an absolute path. 2572 // On Windows, it will contain "C:" followed by '\' for absolute paths. 2573 // The drive letter is optional for absolute paths on Windows, but 2574 // clang currently cannot process absolute paths in #include lines that 2575 // don't have a drive. 2576 // If the first entry in Components is a directory separator, 2577 // then the code at the bottom of this loop that keeps the original 2578 // directory separator style copies it. If the second entry is 2579 // a directory separator (the C:\ case), then that separator already 2580 // got copied when the C: was processed and we want to skip that entry. 2581 if (!(Component.size() == 1 && IsSep(Component[0]))) 2582 Path.append(Component); 2583 else if (Path.size() != 1) 2584 continue; 2585 2586 // Append the separator(s) the user used, or the close quote 2587 if (Path.size() > NameWithoriginalSlashes.size()) { 2588 Path.push_back(isAngled ? '>' : '"'); 2589 continue; 2590 } 2591 assert(IsSep(NameWithoriginalSlashes[Path.size()-1])); 2592 do 2593 Path.push_back(NameWithoriginalSlashes[Path.size()-1]); 2594 while (Path.size() <= NameWithoriginalSlashes.size() && 2595 IsSep(NameWithoriginalSlashes[Path.size()-1])); 2596 } 2597 2598 #if defined(_WIN32) 2599 // Restore UNC prefix if it was there. 2600 if (NameWasUNC) 2601 Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str(); 2602 #endif 2603 2604 // For user files and known standard headers, issue a diagnostic. 2605 // For other system headers, don't. They can be controlled separately. 2606 auto DiagId = 2607 (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) 2608 ? diag::pp_nonportable_path 2609 : diag::pp_nonportable_system_path; 2610 Diag(FilenameTok, DiagId) << Path << 2611 FixItHint::CreateReplacement(FilenameRange, Path); 2612 } 2613 } 2614 2615 switch (Action) { 2616 case Skip: 2617 // If we don't need to enter the file, stop now. 2618 if (ModuleToImport) 2619 return {ImportAction::SkippedModuleImport, ModuleToImport}; 2620 return {ImportAction::None}; 2621 2622 case IncludeLimitReached: 2623 // If we reached our include limit and don't want to enter any more files, 2624 // don't go any further. 2625 return {ImportAction::None}; 2626 2627 case Import: { 2628 // If this is a module import, make it visible if needed. 2629 assert(ModuleToImport && "no module to import"); 2630 2631 makeModuleVisible(ModuleToImport, EndLoc); 2632 2633 if (IncludeTok.getIdentifierInfo()->getPPKeywordID() == 2634 tok::pp___include_macros) 2635 return {ImportAction::None}; 2636 2637 return {ImportAction::ModuleImport, ModuleToImport}; 2638 } 2639 2640 case Enter: 2641 break; 2642 } 2643 2644 // Check that we don't have infinite #include recursion. 2645 if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) { 2646 Diag(FilenameTok, diag::err_pp_include_too_deep); 2647 HasReachedMaxIncludeDepth = true; 2648 return {ImportAction::None}; 2649 } 2650 2651 if (isAngled && isInNamedModule()) 2652 Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview) 2653 << getNamedModuleName(); 2654 2655 // Look up the file, create a File ID for it. 2656 SourceLocation IncludePos = FilenameTok.getLocation(); 2657 // If the filename string was the result of macro expansions, set the include 2658 // position on the file where it will be included and after the expansions. 2659 if (IncludePos.isMacroID()) 2660 IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd(); 2661 FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter); 2662 if (!FID.isValid()) { 2663 TheModuleLoader.HadFatalFailure = true; 2664 return ImportAction::Failure; 2665 } 2666 2667 // If all is good, enter the new file! 2668 if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(), 2669 IsFirstIncludeOfFile)) 2670 return {ImportAction::None}; 2671 2672 // Determine if we're switching to building a new submodule, and which one. 2673 // This does not apply for C++20 modules header units. 2674 if (ModuleToImport && !ModuleToImport->isHeaderUnit()) { 2675 if (ModuleToImport->getTopLevelModule()->ShadowingModule) { 2676 // We are building a submodule that belongs to a shadowed module. This 2677 // means we find header files in the shadowed module. 2678 Diag(ModuleToImport->DefinitionLoc, 2679 diag::err_module_build_shadowed_submodule) 2680 << ModuleToImport->getFullModuleName(); 2681 Diag(ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc, 2682 diag::note_previous_definition); 2683 return {ImportAction::None}; 2684 } 2685 // When building a pch, -fmodule-name tells the compiler to textually 2686 // include headers in the specified module. We are not building the 2687 // specified module. 2688 // 2689 // FIXME: This is the wrong way to handle this. We should produce a PCH 2690 // that behaves the same as the header would behave in a compilation using 2691 // that PCH, which means we should enter the submodule. We need to teach 2692 // the AST serialization layer to deal with the resulting AST. 2693 if (getLangOpts().CompilingPCH && 2694 ModuleToImport->isForBuilding(getLangOpts())) 2695 return {ImportAction::None}; 2696 2697 assert(!CurLexerSubmodule && "should not have marked this as a module yet"); 2698 CurLexerSubmodule = ModuleToImport; 2699 2700 // Let the macro handling code know that any future macros are within 2701 // the new submodule. 2702 EnterSubmodule(ModuleToImport, EndLoc, /*ForPragma*/ false); 2703 2704 // Let the parser know that any future declarations are within the new 2705 // submodule. 2706 // FIXME: There's no point doing this if we're handling a #__include_macros 2707 // directive. 2708 return {ImportAction::ModuleBegin, ModuleToImport}; 2709 } 2710 2711 assert(!IsImportDecl && "failed to diagnose missing module for import decl"); 2712 return {ImportAction::None}; 2713 } 2714 2715 /// HandleIncludeNextDirective - Implements \#include_next. 2716 /// 2717 void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc, 2718 Token &IncludeNextTok) { 2719 Diag(IncludeNextTok, diag::ext_pp_include_next_directive); 2720 2721 ConstSearchDirIterator Lookup = nullptr; 2722 const FileEntry *LookupFromFile; 2723 std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok); 2724 2725 return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup, 2726 LookupFromFile); 2727 } 2728 2729 /// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode 2730 void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) { 2731 // The Microsoft #import directive takes a type library and generates header 2732 // files from it, and includes those. This is beyond the scope of what clang 2733 // does, so we ignore it and error out. However, #import can optionally have 2734 // trailing attributes that span multiple lines. We're going to eat those 2735 // so we can continue processing from there. 2736 Diag(Tok, diag::err_pp_import_directive_ms ); 2737 2738 // Read tokens until we get to the end of the directive. Note that the 2739 // directive can be split over multiple lines using the backslash character. 2740 DiscardUntilEndOfDirective(); 2741 } 2742 2743 /// HandleImportDirective - Implements \#import. 2744 /// 2745 void Preprocessor::HandleImportDirective(SourceLocation HashLoc, 2746 Token &ImportTok) { 2747 if (!LangOpts.ObjC) { // #import is standard for ObjC. 2748 if (LangOpts.MSVCCompat) 2749 return HandleMicrosoftImportDirective(ImportTok); 2750 Diag(ImportTok, diag::ext_pp_import_directive); 2751 } 2752 return HandleIncludeDirective(HashLoc, ImportTok); 2753 } 2754 2755 /// HandleIncludeMacrosDirective - The -imacros command line option turns into a 2756 /// pseudo directive in the predefines buffer. This handles it by sucking all 2757 /// tokens through the preprocessor and discarding them (only keeping the side 2758 /// effects on the preprocessor). 2759 void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc, 2760 Token &IncludeMacrosTok) { 2761 // This directive should only occur in the predefines buffer. If not, emit an 2762 // error and reject it. 2763 SourceLocation Loc = IncludeMacrosTok.getLocation(); 2764 if (SourceMgr.getBufferName(Loc) != "<built-in>") { 2765 Diag(IncludeMacrosTok.getLocation(), 2766 diag::pp_include_macros_out_of_predefines); 2767 DiscardUntilEndOfDirective(); 2768 return; 2769 } 2770 2771 // Treat this as a normal #include for checking purposes. If this is 2772 // successful, it will push a new lexer onto the include stack. 2773 HandleIncludeDirective(HashLoc, IncludeMacrosTok); 2774 2775 Token TmpTok; 2776 do { 2777 Lex(TmpTok); 2778 assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!"); 2779 } while (TmpTok.isNot(tok::hashhash)); 2780 } 2781 2782 //===----------------------------------------------------------------------===// 2783 // Preprocessor Macro Directive Handling. 2784 //===----------------------------------------------------------------------===// 2785 2786 /// ReadMacroParameterList - The ( starting a parameter list of a macro 2787 /// definition has just been read. Lex the rest of the parameters and the 2788 /// closing ), updating MI with what we learn. Return true if an error occurs 2789 /// parsing the param list. 2790 bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) { 2791 SmallVector<IdentifierInfo*, 32> Parameters; 2792 2793 while (true) { 2794 LexUnexpandedNonComment(Tok); 2795 switch (Tok.getKind()) { 2796 case tok::r_paren: 2797 // Found the end of the parameter list. 2798 if (Parameters.empty()) // #define FOO() 2799 return false; 2800 // Otherwise we have #define FOO(A,) 2801 Diag(Tok, diag::err_pp_expected_ident_in_arg_list); 2802 return true; 2803 case tok::ellipsis: // #define X(... -> C99 varargs 2804 if (!LangOpts.C99) 2805 Diag(Tok, LangOpts.CPlusPlus11 ? 2806 diag::warn_cxx98_compat_variadic_macro : 2807 diag::ext_variadic_macro); 2808 2809 // OpenCL v1.2 s6.9.e: variadic macros are not supported. 2810 if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) { 2811 Diag(Tok, diag::ext_pp_opencl_variadic_macros); 2812 } 2813 2814 // Lex the token after the identifier. 2815 LexUnexpandedNonComment(Tok); 2816 if (Tok.isNot(tok::r_paren)) { 2817 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); 2818 return true; 2819 } 2820 // Add the __VA_ARGS__ identifier as a parameter. 2821 Parameters.push_back(Ident__VA_ARGS__); 2822 MI->setIsC99Varargs(); 2823 MI->setParameterList(Parameters, BP); 2824 return false; 2825 case tok::eod: // #define X( 2826 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); 2827 return true; 2828 default: 2829 // Handle keywords and identifiers here to accept things like 2830 // #define Foo(for) for. 2831 IdentifierInfo *II = Tok.getIdentifierInfo(); 2832 if (!II) { 2833 // #define X(1 2834 Diag(Tok, diag::err_pp_invalid_tok_in_arg_list); 2835 return true; 2836 } 2837 2838 // If this is already used as a parameter, it is used multiple times (e.g. 2839 // #define X(A,A. 2840 if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6 2841 Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II; 2842 return true; 2843 } 2844 2845 // Add the parameter to the macro info. 2846 Parameters.push_back(II); 2847 2848 // Lex the token after the identifier. 2849 LexUnexpandedNonComment(Tok); 2850 2851 switch (Tok.getKind()) { 2852 default: // #define X(A B 2853 Diag(Tok, diag::err_pp_expected_comma_in_arg_list); 2854 return true; 2855 case tok::r_paren: // #define X(A) 2856 MI->setParameterList(Parameters, BP); 2857 return false; 2858 case tok::comma: // #define X(A, 2859 break; 2860 case tok::ellipsis: // #define X(A... -> GCC extension 2861 // Diagnose extension. 2862 Diag(Tok, diag::ext_named_variadic_macro); 2863 2864 // Lex the token after the identifier. 2865 LexUnexpandedNonComment(Tok); 2866 if (Tok.isNot(tok::r_paren)) { 2867 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); 2868 return true; 2869 } 2870 2871 MI->setIsGNUVarargs(); 2872 MI->setParameterList(Parameters, BP); 2873 return false; 2874 } 2875 } 2876 } 2877 } 2878 2879 static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI, 2880 const LangOptions &LOptions) { 2881 if (MI->getNumTokens() == 1) { 2882 const Token &Value = MI->getReplacementToken(0); 2883 2884 // Macro that is identity, like '#define inline inline' is a valid pattern. 2885 if (MacroName.getKind() == Value.getKind()) 2886 return true; 2887 2888 // Macro that maps a keyword to the same keyword decorated with leading/ 2889 // trailing underscores is a valid pattern: 2890 // #define inline __inline 2891 // #define inline __inline__ 2892 // #define inline _inline (in MS compatibility mode) 2893 StringRef MacroText = MacroName.getIdentifierInfo()->getName(); 2894 if (IdentifierInfo *II = Value.getIdentifierInfo()) { 2895 if (!II->isKeyword(LOptions)) 2896 return false; 2897 StringRef ValueText = II->getName(); 2898 StringRef TrimmedValue = ValueText; 2899 if (!ValueText.starts_with("__")) { 2900 if (ValueText.starts_with("_")) 2901 TrimmedValue = TrimmedValue.drop_front(1); 2902 else 2903 return false; 2904 } else { 2905 TrimmedValue = TrimmedValue.drop_front(2); 2906 if (TrimmedValue.ends_with("__")) 2907 TrimmedValue = TrimmedValue.drop_back(2); 2908 } 2909 return TrimmedValue == MacroText; 2910 } else { 2911 return false; 2912 } 2913 } 2914 2915 // #define inline 2916 return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static, 2917 tok::kw_const) && 2918 MI->getNumTokens() == 0; 2919 } 2920 2921 // ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the 2922 // entire line) of the macro's tokens and adds them to MacroInfo, and while 2923 // doing so performs certain validity checks including (but not limited to): 2924 // - # (stringization) is followed by a macro parameter 2925 // 2926 // Returns a nullptr if an invalid sequence of tokens is encountered or returns 2927 // a pointer to a MacroInfo object. 2928 2929 MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( 2930 const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) { 2931 2932 Token LastTok = MacroNameTok; 2933 // Create the new macro. 2934 MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation()); 2935 2936 Token Tok; 2937 LexUnexpandedToken(Tok); 2938 2939 // Ensure we consume the rest of the macro body if errors occur. 2940 auto _ = llvm::make_scope_exit([&]() { 2941 // The flag indicates if we are still waiting for 'eod'. 2942 if (CurLexer->ParsingPreprocessorDirective) 2943 DiscardUntilEndOfDirective(); 2944 }); 2945 2946 // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk 2947 // within their appropriate context. 2948 VariadicMacroScopeGuard VariadicMacroScopeGuard(*this); 2949 2950 // If this is a function-like macro definition, parse the argument list, 2951 // marking each of the identifiers as being used as macro arguments. Also, 2952 // check other constraints on the first token of the macro body. 2953 if (Tok.is(tok::eod)) { 2954 if (ImmediatelyAfterHeaderGuard) { 2955 // Save this macro information since it may part of a header guard. 2956 CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(), 2957 MacroNameTok.getLocation()); 2958 } 2959 // If there is no body to this macro, we have no special handling here. 2960 } else if (Tok.hasLeadingSpace()) { 2961 // This is a normal token with leading space. Clear the leading space 2962 // marker on the first token to get proper expansion. 2963 Tok.clearFlag(Token::LeadingSpace); 2964 } else if (Tok.is(tok::l_paren)) { 2965 // This is a function-like macro definition. Read the argument list. 2966 MI->setIsFunctionLike(); 2967 if (ReadMacroParameterList(MI, LastTok)) 2968 return nullptr; 2969 2970 // If this is a definition of an ISO C/C++ variadic function-like macro (not 2971 // using the GNU named varargs extension) inform our variadic scope guard 2972 // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__) 2973 // allowed only within the definition of a variadic macro. 2974 2975 if (MI->isC99Varargs()) { 2976 VariadicMacroScopeGuard.enterScope(); 2977 } 2978 2979 // Read the first token after the arg list for down below. 2980 LexUnexpandedToken(Tok); 2981 } else if (LangOpts.C99 || LangOpts.CPlusPlus11) { 2982 // C99 requires whitespace between the macro definition and the body. Emit 2983 // a diagnostic for something like "#define X+". 2984 Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name); 2985 } else { 2986 // C90 6.8 TC1 says: "In the definition of an object-like macro, if the 2987 // first character of a replacement list is not a character required by 2988 // subclause 5.2.1, then there shall be white-space separation between the 2989 // identifier and the replacement list.". 5.2.1 lists this set: 2990 // "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which 2991 // is irrelevant here. 2992 bool isInvalid = false; 2993 if (Tok.is(tok::at)) // @ is not in the list above. 2994 isInvalid = true; 2995 else if (Tok.is(tok::unknown)) { 2996 // If we have an unknown token, it is something strange like "`". Since 2997 // all of valid characters would have lexed into a single character 2998 // token of some sort, we know this is not a valid case. 2999 isInvalid = true; 3000 } 3001 if (isInvalid) 3002 Diag(Tok, diag::ext_missing_whitespace_after_macro_name); 3003 else 3004 Diag(Tok, diag::warn_missing_whitespace_after_macro_name); 3005 } 3006 3007 if (!Tok.is(tok::eod)) 3008 LastTok = Tok; 3009 3010 SmallVector<Token, 16> Tokens; 3011 3012 // Read the rest of the macro body. 3013 if (MI->isObjectLike()) { 3014 // Object-like macros are very simple, just read their body. 3015 while (Tok.isNot(tok::eod)) { 3016 LastTok = Tok; 3017 Tokens.push_back(Tok); 3018 // Get the next token of the macro. 3019 LexUnexpandedToken(Tok); 3020 } 3021 } else { 3022 // Otherwise, read the body of a function-like macro. While we are at it, 3023 // check C99 6.10.3.2p1: ensure that # operators are followed by macro 3024 // parameters in function-like macro expansions. 3025 3026 VAOptDefinitionContext VAOCtx(*this); 3027 3028 while (Tok.isNot(tok::eod)) { 3029 LastTok = Tok; 3030 3031 if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) { 3032 Tokens.push_back(Tok); 3033 3034 if (VAOCtx.isVAOptToken(Tok)) { 3035 // If we're already within a VAOPT, emit an error. 3036 if (VAOCtx.isInVAOpt()) { 3037 Diag(Tok, diag::err_pp_vaopt_nested_use); 3038 return nullptr; 3039 } 3040 // Ensure VAOPT is followed by a '(' . 3041 LexUnexpandedToken(Tok); 3042 if (Tok.isNot(tok::l_paren)) { 3043 Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use); 3044 return nullptr; 3045 } 3046 Tokens.push_back(Tok); 3047 VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation()); 3048 LexUnexpandedToken(Tok); 3049 if (Tok.is(tok::hashhash)) { 3050 Diag(Tok, diag::err_vaopt_paste_at_start); 3051 return nullptr; 3052 } 3053 continue; 3054 } else if (VAOCtx.isInVAOpt()) { 3055 if (Tok.is(tok::r_paren)) { 3056 if (VAOCtx.sawClosingParen()) { 3057 assert(Tokens.size() >= 3 && 3058 "Must have seen at least __VA_OPT__( " 3059 "and a subsequent tok::r_paren"); 3060 if (Tokens[Tokens.size() - 2].is(tok::hashhash)) { 3061 Diag(Tok, diag::err_vaopt_paste_at_end); 3062 return nullptr; 3063 } 3064 } 3065 } else if (Tok.is(tok::l_paren)) { 3066 VAOCtx.sawOpeningParen(Tok.getLocation()); 3067 } 3068 } 3069 // Get the next token of the macro. 3070 LexUnexpandedToken(Tok); 3071 continue; 3072 } 3073 3074 // If we're in -traditional mode, then we should ignore stringification 3075 // and token pasting. Mark the tokens as unknown so as not to confuse 3076 // things. 3077 if (getLangOpts().TraditionalCPP) { 3078 Tok.setKind(tok::unknown); 3079 Tokens.push_back(Tok); 3080 3081 // Get the next token of the macro. 3082 LexUnexpandedToken(Tok); 3083 continue; 3084 } 3085 3086 if (Tok.is(tok::hashhash)) { 3087 // If we see token pasting, check if it looks like the gcc comma 3088 // pasting extension. We'll use this information to suppress 3089 // diagnostics later on. 3090 3091 // Get the next token of the macro. 3092 LexUnexpandedToken(Tok); 3093 3094 if (Tok.is(tok::eod)) { 3095 Tokens.push_back(LastTok); 3096 break; 3097 } 3098 3099 if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ && 3100 Tokens[Tokens.size() - 1].is(tok::comma)) 3101 MI->setHasCommaPasting(); 3102 3103 // Things look ok, add the '##' token to the macro. 3104 Tokens.push_back(LastTok); 3105 continue; 3106 } 3107 3108 // Our Token is a stringization operator. 3109 // Get the next token of the macro. 3110 LexUnexpandedToken(Tok); 3111 3112 // Check for a valid macro arg identifier or __VA_OPT__. 3113 if (!VAOCtx.isVAOptToken(Tok) && 3114 (Tok.getIdentifierInfo() == nullptr || 3115 MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) { 3116 3117 // If this is assembler-with-cpp mode, we accept random gibberish after 3118 // the '#' because '#' is often a comment character. However, change 3119 // the kind of the token to tok::unknown so that the preprocessor isn't 3120 // confused. 3121 if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) { 3122 LastTok.setKind(tok::unknown); 3123 Tokens.push_back(LastTok); 3124 continue; 3125 } else { 3126 Diag(Tok, diag::err_pp_stringize_not_parameter) 3127 << LastTok.is(tok::hashat); 3128 return nullptr; 3129 } 3130 } 3131 3132 // Things look ok, add the '#' and param name tokens to the macro. 3133 Tokens.push_back(LastTok); 3134 3135 // If the token following '#' is VAOPT, let the next iteration handle it 3136 // and check it for correctness, otherwise add the token and prime the 3137 // loop with the next one. 3138 if (!VAOCtx.isVAOptToken(Tok)) { 3139 Tokens.push_back(Tok); 3140 LastTok = Tok; 3141 3142 // Get the next token of the macro. 3143 LexUnexpandedToken(Tok); 3144 } 3145 } 3146 if (VAOCtx.isInVAOpt()) { 3147 assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive"); 3148 Diag(Tok, diag::err_pp_expected_after) 3149 << LastTok.getKind() << tok::r_paren; 3150 Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren; 3151 return nullptr; 3152 } 3153 } 3154 MI->setDefinitionEndLoc(LastTok.getLocation()); 3155 3156 MI->setTokens(Tokens, BP); 3157 return MI; 3158 } 3159 3160 static bool isObjCProtectedMacro(const IdentifierInfo *II) { 3161 return II->isStr("__strong") || II->isStr("__weak") || 3162 II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing"); 3163 } 3164 3165 /// HandleDefineDirective - Implements \#define. This consumes the entire macro 3166 /// line then lets the caller lex the next real token. 3167 void Preprocessor::HandleDefineDirective( 3168 Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) { 3169 ++NumDefined; 3170 3171 Token MacroNameTok; 3172 bool MacroShadowsKeyword; 3173 ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword); 3174 3175 // Error reading macro name? If so, diagnostic already issued. 3176 if (MacroNameTok.is(tok::eod)) 3177 return; 3178 3179 IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); 3180 // Issue a final pragma warning if we're defining a macro that was has been 3181 // undefined and is being redefined. 3182 if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal()) 3183 emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false); 3184 3185 // If we are supposed to keep comments in #defines, reenable comment saving 3186 // mode. 3187 if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments); 3188 3189 MacroInfo *const MI = ReadOptionalMacroParameterListAndBody( 3190 MacroNameTok, ImmediatelyAfterHeaderGuard); 3191 3192 if (!MI) return; 3193 3194 if (MacroShadowsKeyword && 3195 !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) { 3196 Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword); 3197 } 3198 // Check that there is no paste (##) operator at the beginning or end of the 3199 // replacement list. 3200 unsigned NumTokens = MI->getNumTokens(); 3201 if (NumTokens != 0) { 3202 if (MI->getReplacementToken(0).is(tok::hashhash)) { 3203 Diag(MI->getReplacementToken(0), diag::err_paste_at_start); 3204 return; 3205 } 3206 if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) { 3207 Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end); 3208 return; 3209 } 3210 } 3211 3212 // When skipping just warn about macros that do not match. 3213 if (SkippingUntilPCHThroughHeader) { 3214 const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo()); 3215 if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this, 3216 /*Syntactic=*/LangOpts.MicrosoftExt)) 3217 Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch) 3218 << MacroNameTok.getIdentifierInfo(); 3219 // Issue the diagnostic but allow the change if msvc extensions are enabled 3220 if (!LangOpts.MicrosoftExt) 3221 return; 3222 } 3223 3224 // Finally, if this identifier already had a macro defined for it, verify that 3225 // the macro bodies are identical, and issue diagnostics if they are not. 3226 if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) { 3227 // Final macros are hard-mode: they always warn. Even if the bodies are 3228 // identical. Even if they are in system headers. Even if they are things we 3229 // would silently allow in the past. 3230 if (MacroNameTok.getIdentifierInfo()->isFinal()) 3231 emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false); 3232 3233 // In Objective-C, ignore attempts to directly redefine the builtin 3234 // definitions of the ownership qualifiers. It's still possible to 3235 // #undef them. 3236 if (getLangOpts().ObjC && 3237 SourceMgr.getFileID(OtherMI->getDefinitionLoc()) == 3238 getPredefinesFileID() && 3239 isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) { 3240 // Warn if it changes the tokens. 3241 if ((!getDiagnostics().getSuppressSystemWarnings() || 3242 !SourceMgr.isInSystemHeader(DefineTok.getLocation())) && 3243 !MI->isIdenticalTo(*OtherMI, *this, 3244 /*Syntactic=*/LangOpts.MicrosoftExt)) { 3245 Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored); 3246 } 3247 assert(!OtherMI->isWarnIfUnused()); 3248 return; 3249 } 3250 3251 // It is very common for system headers to have tons of macro redefinitions 3252 // and for warnings to be disabled in system headers. If this is the case, 3253 // then don't bother calling MacroInfo::isIdenticalTo. 3254 if (!getDiagnostics().getSuppressSystemWarnings() || 3255 !SourceMgr.isInSystemHeader(DefineTok.getLocation())) { 3256 3257 if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused()) 3258 Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used); 3259 3260 // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and 3261 // C++ [cpp.predefined]p4, but allow it as an extension. 3262 if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName())) 3263 Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro); 3264 // Macros must be identical. This means all tokens and whitespace 3265 // separation must be the same. C99 6.10.3p2. 3266 else if (!OtherMI->isAllowRedefinitionsWithoutWarning() && 3267 !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) { 3268 Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef) 3269 << MacroNameTok.getIdentifierInfo(); 3270 Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition); 3271 } 3272 } 3273 if (OtherMI->isWarnIfUnused()) 3274 WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc()); 3275 } 3276 3277 DefMacroDirective *MD = 3278 appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI); 3279 3280 assert(!MI->isUsed()); 3281 // If we need warning for not using the macro, add its location in the 3282 // warn-because-unused-macro set. If it gets used it will be removed from set. 3283 if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) && 3284 !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) && 3285 !MacroExpansionInDirectivesOverride && 3286 getSourceManager().getFileID(MI->getDefinitionLoc()) != 3287 getPredefinesFileID()) { 3288 MI->setIsWarnIfUnused(true); 3289 WarnUnusedMacroLocs.insert(MI->getDefinitionLoc()); 3290 } 3291 3292 // If the callbacks want to know, tell them about the macro definition. 3293 if (Callbacks) 3294 Callbacks->MacroDefined(MacroNameTok, MD); 3295 3296 // If we're in MS compatibility mode and the macro being defined is the 3297 // assert macro, implicitly add a macro definition for static_assert to work 3298 // around their broken assert.h header file in C. Only do so if there isn't 3299 // already a static_assert macro defined. 3300 if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat && 3301 MacroNameTok.getIdentifierInfo()->isStr("assert") && 3302 !isMacroDefined("static_assert")) { 3303 MacroInfo *MI = AllocateMacroInfo(SourceLocation()); 3304 3305 Token Tok; 3306 Tok.startToken(); 3307 Tok.setKind(tok::kw__Static_assert); 3308 Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert")); 3309 MI->setTokens({Tok}, BP); 3310 (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI); 3311 } 3312 } 3313 3314 /// HandleUndefDirective - Implements \#undef. 3315 /// 3316 void Preprocessor::HandleUndefDirective() { 3317 ++NumUndefined; 3318 3319 Token MacroNameTok; 3320 ReadMacroName(MacroNameTok, MU_Undef); 3321 3322 // Error reading macro name? If so, diagnostic already issued. 3323 if (MacroNameTok.is(tok::eod)) 3324 return; 3325 3326 // Check to see if this is the last token on the #undef line. 3327 CheckEndOfDirective("undef"); 3328 3329 // Okay, we have a valid identifier to undef. 3330 auto *II = MacroNameTok.getIdentifierInfo(); 3331 auto MD = getMacroDefinition(II); 3332 UndefMacroDirective *Undef = nullptr; 3333 3334 if (II->isFinal()) 3335 emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true); 3336 3337 // If the macro is not defined, this is a noop undef. 3338 if (const MacroInfo *MI = MD.getMacroInfo()) { 3339 if (!MI->isUsed() && MI->isWarnIfUnused()) 3340 Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used); 3341 3342 // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and 3343 // C++ [cpp.predefined]p4, but allow it as an extension. 3344 if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName())) 3345 Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro); 3346 3347 if (MI->isWarnIfUnused()) 3348 WarnUnusedMacroLocs.erase(MI->getDefinitionLoc()); 3349 3350 Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation()); 3351 } 3352 3353 // If the callbacks want to know, tell them about the macro #undef. 3354 // Note: no matter if the macro was defined or not. 3355 if (Callbacks) 3356 Callbacks->MacroUndefined(MacroNameTok, MD, Undef); 3357 3358 if (Undef) 3359 appendMacroDirective(II, Undef); 3360 } 3361 3362 //===----------------------------------------------------------------------===// 3363 // Preprocessor Conditional Directive Handling. 3364 //===----------------------------------------------------------------------===// 3365 3366 /// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef 3367 /// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is 3368 /// true if any tokens have been returned or pp-directives activated before this 3369 /// \#ifndef has been lexed. 3370 /// 3371 void Preprocessor::HandleIfdefDirective(Token &Result, 3372 const Token &HashToken, 3373 bool isIfndef, 3374 bool ReadAnyTokensBeforeDirective) { 3375 ++NumIf; 3376 Token DirectiveTok = Result; 3377 3378 Token MacroNameTok; 3379 ReadMacroName(MacroNameTok); 3380 3381 // Error reading macro name? If so, diagnostic already issued. 3382 if (MacroNameTok.is(tok::eod)) { 3383 // Skip code until we get to #endif. This helps with recovery by not 3384 // emitting an error when the #endif is reached. 3385 SkipExcludedConditionalBlock(HashToken.getLocation(), 3386 DirectiveTok.getLocation(), 3387 /*Foundnonskip*/ false, /*FoundElse*/ false); 3388 return; 3389 } 3390 3391 emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true); 3392 3393 // Check to see if this is the last token on the #if[n]def line. 3394 CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef"); 3395 3396 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); 3397 auto MD = getMacroDefinition(MII); 3398 MacroInfo *MI = MD.getMacroInfo(); 3399 3400 if (CurPPLexer->getConditionalStackDepth() == 0) { 3401 // If the start of a top-level #ifdef and if the macro is not defined, 3402 // inform MIOpt that this might be the start of a proper include guard. 3403 // Otherwise it is some other form of unknown conditional which we can't 3404 // handle. 3405 if (!ReadAnyTokensBeforeDirective && !MI) { 3406 assert(isIfndef && "#ifdef shouldn't reach here"); 3407 CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation()); 3408 } else 3409 CurPPLexer->MIOpt.EnterTopLevelConditional(); 3410 } 3411 3412 // If there is a macro, process it. 3413 if (MI) // Mark it used. 3414 markMacroAsUsed(MI); 3415 3416 if (Callbacks) { 3417 if (isIfndef) 3418 Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD); 3419 else 3420 Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD); 3421 } 3422 3423 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && 3424 getSourceManager().isInMainFile(DirectiveTok.getLocation()); 3425 3426 // Should we include the stuff contained by this directive? 3427 if (PPOpts->SingleFileParseMode && !MI) { 3428 // In 'single-file-parse mode' undefined identifiers trigger parsing of all 3429 // the directive blocks. 3430 CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(), 3431 /*wasskip*/false, /*foundnonskip*/false, 3432 /*foundelse*/false); 3433 } else if (!MI == isIfndef || RetainExcludedCB) { 3434 // Yes, remember that we are inside a conditional, then lex the next token. 3435 CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(), 3436 /*wasskip*/false, /*foundnonskip*/true, 3437 /*foundelse*/false); 3438 } else { 3439 // No, skip the contents of this block. 3440 SkipExcludedConditionalBlock(HashToken.getLocation(), 3441 DirectiveTok.getLocation(), 3442 /*Foundnonskip*/ false, 3443 /*FoundElse*/ false); 3444 } 3445 } 3446 3447 /// HandleIfDirective - Implements the \#if directive. 3448 /// 3449 void Preprocessor::HandleIfDirective(Token &IfToken, 3450 const Token &HashToken, 3451 bool ReadAnyTokensBeforeDirective) { 3452 ++NumIf; 3453 3454 // Parse and evaluate the conditional expression. 3455 IdentifierInfo *IfNDefMacro = nullptr; 3456 const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro); 3457 const bool ConditionalTrue = DER.Conditional; 3458 // Lexer might become invalid if we hit code completion point while evaluating 3459 // expression. 3460 if (!CurPPLexer) 3461 return; 3462 3463 // If this condition is equivalent to #ifndef X, and if this is the first 3464 // directive seen, handle it for the multiple-include optimization. 3465 if (CurPPLexer->getConditionalStackDepth() == 0) { 3466 if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue) 3467 // FIXME: Pass in the location of the macro name, not the 'if' token. 3468 CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation()); 3469 else 3470 CurPPLexer->MIOpt.EnterTopLevelConditional(); 3471 } 3472 3473 if (Callbacks) 3474 Callbacks->If( 3475 IfToken.getLocation(), DER.ExprRange, 3476 (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False)); 3477 3478 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && 3479 getSourceManager().isInMainFile(IfToken.getLocation()); 3480 3481 // Should we include the stuff contained by this directive? 3482 if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) { 3483 // In 'single-file-parse mode' undefined identifiers trigger parsing of all 3484 // the directive blocks. 3485 CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false, 3486 /*foundnonskip*/false, /*foundelse*/false); 3487 } else if (ConditionalTrue || RetainExcludedCB) { 3488 // Yes, remember that we are inside a conditional, then lex the next token. 3489 CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false, 3490 /*foundnonskip*/true, /*foundelse*/false); 3491 } else { 3492 // No, skip the contents of this block. 3493 SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(), 3494 /*Foundnonskip*/ false, 3495 /*FoundElse*/ false); 3496 } 3497 } 3498 3499 /// HandleEndifDirective - Implements the \#endif directive. 3500 /// 3501 void Preprocessor::HandleEndifDirective(Token &EndifToken) { 3502 ++NumEndif; 3503 3504 // Check that this is the whole directive. 3505 CheckEndOfDirective("endif"); 3506 3507 PPConditionalInfo CondInfo; 3508 if (CurPPLexer->popConditionalLevel(CondInfo)) { 3509 // No conditionals on the stack: this is an #endif without an #if. 3510 Diag(EndifToken, diag::err_pp_endif_without_if); 3511 return; 3512 } 3513 3514 // If this the end of a top-level #endif, inform MIOpt. 3515 if (CurPPLexer->getConditionalStackDepth() == 0) 3516 CurPPLexer->MIOpt.ExitTopLevelConditional(); 3517 3518 assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode && 3519 "This code should only be reachable in the non-skipping case!"); 3520 3521 if (Callbacks) 3522 Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc); 3523 } 3524 3525 /// HandleElseDirective - Implements the \#else directive. 3526 /// 3527 void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) { 3528 ++NumElse; 3529 3530 // #else directive in a non-skipping conditional... start skipping. 3531 CheckEndOfDirective("else"); 3532 3533 PPConditionalInfo CI; 3534 if (CurPPLexer->popConditionalLevel(CI)) { 3535 Diag(Result, diag::pp_err_else_without_if); 3536 return; 3537 } 3538 3539 // If this is a top-level #else, inform the MIOpt. 3540 if (CurPPLexer->getConditionalStackDepth() == 0) 3541 CurPPLexer->MIOpt.EnterTopLevelConditional(); 3542 3543 // If this is a #else with a #else before it, report the error. 3544 if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else); 3545 3546 if (Callbacks) 3547 Callbacks->Else(Result.getLocation(), CI.IfLoc); 3548 3549 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && 3550 getSourceManager().isInMainFile(Result.getLocation()); 3551 3552 if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) { 3553 // In 'single-file-parse mode' undefined identifiers trigger parsing of all 3554 // the directive blocks. 3555 CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false, 3556 /*foundnonskip*/false, /*foundelse*/true); 3557 return; 3558 } 3559 3560 // Finally, skip the rest of the contents of this block. 3561 SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc, 3562 /*Foundnonskip*/ true, 3563 /*FoundElse*/ true, Result.getLocation()); 3564 } 3565 3566 /// Implements the \#elif, \#elifdef, and \#elifndef directives. 3567 void Preprocessor::HandleElifFamilyDirective(Token &ElifToken, 3568 const Token &HashToken, 3569 tok::PPKeywordKind Kind) { 3570 PPElifDiag DirKind = Kind == tok::pp_elif ? PED_Elif 3571 : Kind == tok::pp_elifdef ? PED_Elifdef 3572 : PED_Elifndef; 3573 ++NumElse; 3574 3575 // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode. 3576 switch (DirKind) { 3577 case PED_Elifdef: 3578 case PED_Elifndef: 3579 unsigned DiagID; 3580 if (LangOpts.CPlusPlus) 3581 DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive 3582 : diag::ext_cxx23_pp_directive; 3583 else 3584 DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive 3585 : diag::ext_c23_pp_directive; 3586 Diag(ElifToken, DiagID) << DirKind; 3587 break; 3588 default: 3589 break; 3590 } 3591 3592 // #elif directive in a non-skipping conditional... start skipping. 3593 // We don't care what the condition is, because we will always skip it (since 3594 // the block immediately before it was included). 3595 SourceRange ConditionRange = DiscardUntilEndOfDirective(); 3596 3597 PPConditionalInfo CI; 3598 if (CurPPLexer->popConditionalLevel(CI)) { 3599 Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind; 3600 return; 3601 } 3602 3603 // If this is a top-level #elif, inform the MIOpt. 3604 if (CurPPLexer->getConditionalStackDepth() == 0) 3605 CurPPLexer->MIOpt.EnterTopLevelConditional(); 3606 3607 // If this is a #elif with a #else before it, report the error. 3608 if (CI.FoundElse) 3609 Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind; 3610 3611 if (Callbacks) { 3612 switch (Kind) { 3613 case tok::pp_elif: 3614 Callbacks->Elif(ElifToken.getLocation(), ConditionRange, 3615 PPCallbacks::CVK_NotEvaluated, CI.IfLoc); 3616 break; 3617 case tok::pp_elifdef: 3618 Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc); 3619 break; 3620 case tok::pp_elifndef: 3621 Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc); 3622 break; 3623 default: 3624 assert(false && "unexpected directive kind"); 3625 break; 3626 } 3627 } 3628 3629 bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks && 3630 getSourceManager().isInMainFile(ElifToken.getLocation()); 3631 3632 if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) { 3633 // In 'single-file-parse mode' undefined identifiers trigger parsing of all 3634 // the directive blocks. 3635 CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false, 3636 /*foundnonskip*/false, /*foundelse*/false); 3637 return; 3638 } 3639 3640 // Finally, skip the rest of the contents of this block. 3641 SkipExcludedConditionalBlock( 3642 HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true, 3643 /*FoundElse*/ CI.FoundElse, ElifToken.getLocation()); 3644 } 3645 3646 std::optional<LexEmbedParametersResult> 3647 Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) { 3648 LexEmbedParametersResult Result{}; 3649 SmallVector<Token, 2> ParameterTokens; 3650 tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod; 3651 3652 auto DiagMismatchedBracesAndSkipToEOD = 3653 [&](tok::TokenKind Expected, 3654 std::pair<tok::TokenKind, SourceLocation> Matches) { 3655 Diag(CurTok, diag::err_expected) << Expected; 3656 Diag(Matches.second, diag::note_matching) << Matches.first; 3657 if (CurTok.isNot(tok::eod)) 3658 DiscardUntilEndOfDirective(CurTok); 3659 }; 3660 3661 auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) { 3662 if (CurTok.isNot(Kind)) { 3663 Diag(CurTok, diag::err_expected) << Kind; 3664 if (CurTok.isNot(tok::eod)) 3665 DiscardUntilEndOfDirective(CurTok); 3666 return false; 3667 } 3668 return true; 3669 }; 3670 3671 // C23 6.10: 3672 // pp-parameter-name: 3673 // pp-standard-parameter 3674 // pp-prefixed-parameter 3675 // 3676 // pp-standard-parameter: 3677 // identifier 3678 // 3679 // pp-prefixed-parameter: 3680 // identifier :: identifier 3681 auto LexPPParameterName = [&]() -> std::optional<std::string> { 3682 // We expect the current token to be an identifier; if it's not, things 3683 // have gone wrong. 3684 if (!ExpectOrDiagAndSkipToEOD(tok::identifier)) 3685 return std::nullopt; 3686 3687 const IdentifierInfo *Prefix = CurTok.getIdentifierInfo(); 3688 3689 // Lex another token; it is either a :: or we're done with the parameter 3690 // name. 3691 LexNonComment(CurTok); 3692 if (CurTok.is(tok::coloncolon)) { 3693 // We found a ::, so lex another identifier token. 3694 LexNonComment(CurTok); 3695 if (!ExpectOrDiagAndSkipToEOD(tok::identifier)) 3696 return std::nullopt; 3697 3698 const IdentifierInfo *Suffix = CurTok.getIdentifierInfo(); 3699 3700 // Lex another token so we're past the name. 3701 LexNonComment(CurTok); 3702 return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str(); 3703 } 3704 return Prefix->getName().str(); 3705 }; 3706 3707 // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by 3708 // this document as an identifier pp_param and an identifier of the form 3709 // __pp_param__ shall behave the same when used as a preprocessor parameter, 3710 // except for the spelling. 3711 auto NormalizeParameterName = [](StringRef Name) { 3712 if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__")) 3713 return Name.substr(2, Name.size() - 4); 3714 return Name; 3715 }; 3716 3717 auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> { 3718 // we have a limit parameter and its internals are processed using 3719 // evaluation rules from #if. 3720 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren)) 3721 return std::nullopt; 3722 3723 // We do not consume the ( because EvaluateDirectiveExpression will lex 3724 // the next token for us. 3725 IdentifierInfo *ParameterIfNDef = nullptr; 3726 bool EvaluatedDefined; 3727 DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression( 3728 ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false); 3729 3730 if (!LimitEvalResult.Value) { 3731 // If there was an error evaluating the directive expression, we expect 3732 // to be at the end of directive token. 3733 assert(CurTok.is(tok::eod) && "expect to be at the end of directive"); 3734 return std::nullopt; 3735 } 3736 3737 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren)) 3738 return std::nullopt; 3739 3740 // Eat the ). 3741 LexNonComment(CurTok); 3742 3743 // C23 6.10.3.2p2: The token defined shall not appear within the constant 3744 // expression. 3745 if (EvaluatedDefined) { 3746 Diag(CurTok, diag::err_defined_in_pp_embed); 3747 return std::nullopt; 3748 } 3749 3750 if (LimitEvalResult.Value) { 3751 const llvm::APSInt &Result = *LimitEvalResult.Value; 3752 if (Result.isNegative()) { 3753 Diag(CurTok, diag::err_requires_positive_value) 3754 << toString(Result, 10) << /*positive*/ 0; 3755 return std::nullopt; 3756 } 3757 return Result.getLimitedValue(); 3758 } 3759 return std::nullopt; 3760 }; 3761 3762 auto GetMatchingCloseBracket = [](tok::TokenKind Kind) { 3763 switch (Kind) { 3764 case tok::l_paren: 3765 return tok::r_paren; 3766 case tok::l_brace: 3767 return tok::r_brace; 3768 case tok::l_square: 3769 return tok::r_square; 3770 default: 3771 llvm_unreachable("should not get here"); 3772 } 3773 }; 3774 3775 auto LexParenthesizedBalancedTokenSoup = 3776 [&](llvm::SmallVectorImpl<Token> &Tokens) { 3777 std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack; 3778 3779 // We expect the current token to be a left paren. 3780 if (!ExpectOrDiagAndSkipToEOD(tok::l_paren)) 3781 return false; 3782 LexNonComment(CurTok); // Eat the ( 3783 3784 bool WaitingForInnerCloseParen = false; 3785 while (CurTok.isNot(tok::eod) && 3786 (WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) { 3787 switch (CurTok.getKind()) { 3788 default: // Shutting up diagnostics about not fully-covered switch. 3789 break; 3790 case tok::l_paren: 3791 WaitingForInnerCloseParen = true; 3792 [[fallthrough]]; 3793 case tok::l_brace: 3794 case tok::l_square: 3795 BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()}); 3796 break; 3797 case tok::r_paren: 3798 WaitingForInnerCloseParen = false; 3799 [[fallthrough]]; 3800 case tok::r_brace: 3801 case tok::r_square: { 3802 tok::TokenKind Matching = 3803 GetMatchingCloseBracket(BracketStack.back().first); 3804 if (BracketStack.empty() || CurTok.getKind() != Matching) { 3805 DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back()); 3806 return false; 3807 } 3808 BracketStack.pop_back(); 3809 } break; 3810 } 3811 Tokens.push_back(CurTok); 3812 LexNonComment(CurTok); 3813 } 3814 3815 // When we're done, we want to eat the closing paren. 3816 if (!ExpectOrDiagAndSkipToEOD(tok::r_paren)) 3817 return false; 3818 3819 LexNonComment(CurTok); // Eat the ) 3820 return true; 3821 }; 3822 3823 LexNonComment(CurTok); // Prime the pump. 3824 while (!CurTok.isOneOf(EndTokenKind, tok::eod)) { 3825 SourceLocation ParamStartLoc = CurTok.getLocation(); 3826 std::optional<std::string> ParamName = LexPPParameterName(); 3827 if (!ParamName) 3828 return std::nullopt; 3829 StringRef Parameter = NormalizeParameterName(*ParamName); 3830 3831 // Lex the parameters (dependent on the parameter type we want!). 3832 // 3833 // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or 3834 // one time in the embed parameter sequence. 3835 if (Parameter == "limit") { 3836 if (Result.MaybeLimitParam) 3837 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3838 3839 std::optional<size_t> Limit = LexParenthesizedIntegerExpr(); 3840 if (!Limit) 3841 return std::nullopt; 3842 Result.MaybeLimitParam = 3843 PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}}; 3844 } else if (Parameter == "clang::offset") { 3845 if (Result.MaybeOffsetParam) 3846 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3847 3848 std::optional<size_t> Offset = LexParenthesizedIntegerExpr(); 3849 if (!Offset) 3850 return std::nullopt; 3851 Result.MaybeOffsetParam = PPEmbedParameterOffset{ 3852 *Offset, {ParamStartLoc, CurTok.getLocation()}}; 3853 } else if (Parameter == "prefix") { 3854 if (Result.MaybePrefixParam) 3855 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3856 3857 SmallVector<Token, 4> Soup; 3858 if (!LexParenthesizedBalancedTokenSoup(Soup)) 3859 return std::nullopt; 3860 Result.MaybePrefixParam = PPEmbedParameterPrefix{ 3861 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}}; 3862 } else if (Parameter == "suffix") { 3863 if (Result.MaybeSuffixParam) 3864 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3865 3866 SmallVector<Token, 4> Soup; 3867 if (!LexParenthesizedBalancedTokenSoup(Soup)) 3868 return std::nullopt; 3869 Result.MaybeSuffixParam = PPEmbedParameterSuffix{ 3870 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}}; 3871 } else if (Parameter == "if_empty") { 3872 if (Result.MaybeIfEmptyParam) 3873 Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter; 3874 3875 SmallVector<Token, 4> Soup; 3876 if (!LexParenthesizedBalancedTokenSoup(Soup)) 3877 return std::nullopt; 3878 Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{ 3879 std::move(Soup), {ParamStartLoc, CurTok.getLocation()}}; 3880 } else { 3881 ++Result.UnrecognizedParams; 3882 3883 // If there's a left paren, we need to parse a balanced token sequence 3884 // and just eat those tokens. 3885 if (CurTok.is(tok::l_paren)) { 3886 SmallVector<Token, 4> Soup; 3887 if (!LexParenthesizedBalancedTokenSoup(Soup)) 3888 return std::nullopt; 3889 } 3890 if (!ForHasEmbed) { 3891 Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter; 3892 return std::nullopt; 3893 } 3894 } 3895 } 3896 return Result; 3897 } 3898 3899 void Preprocessor::HandleEmbedDirectiveImpl( 3900 SourceLocation HashLoc, const LexEmbedParametersResult &Params, 3901 StringRef BinaryContents) { 3902 if (BinaryContents.empty()) { 3903 // If we have no binary contents, the only thing we need to emit are the 3904 // if_empty tokens, if any. 3905 // FIXME: this loses AST fidelity; nothing in the compiler will see that 3906 // these tokens came from #embed. We have to hack around this when printing 3907 // preprocessed output. The same is true for prefix and suffix tokens. 3908 if (Params.MaybeIfEmptyParam) { 3909 ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens; 3910 size_t TokCount = Toks.size(); 3911 auto NewToks = std::make_unique<Token[]>(TokCount); 3912 llvm::copy(Toks, NewToks.get()); 3913 EnterTokenStream(std::move(NewToks), TokCount, true, true); 3914 } 3915 return; 3916 } 3917 3918 size_t NumPrefixToks = Params.PrefixTokenCount(), 3919 NumSuffixToks = Params.SuffixTokenCount(); 3920 size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks; 3921 size_t CurIdx = 0; 3922 auto Toks = std::make_unique<Token[]>(TotalNumToks); 3923 3924 // Add the prefix tokens, if any. 3925 if (Params.MaybePrefixParam) { 3926 llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]); 3927 CurIdx += NumPrefixToks; 3928 } 3929 3930 EmbedAnnotationData *Data = new (BP) EmbedAnnotationData; 3931 Data->BinaryData = BinaryContents; 3932 3933 Toks[CurIdx].startToken(); 3934 Toks[CurIdx].setKind(tok::annot_embed); 3935 Toks[CurIdx].setAnnotationRange(HashLoc); 3936 Toks[CurIdx++].setAnnotationValue(Data); 3937 3938 // Now add the suffix tokens, if any. 3939 if (Params.MaybeSuffixParam) { 3940 llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]); 3941 CurIdx += NumSuffixToks; 3942 } 3943 3944 assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens"); 3945 EnterTokenStream(std::move(Toks), TotalNumToks, true, true); 3946 } 3947 3948 void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, 3949 const FileEntry *LookupFromFile) { 3950 // Give the usual extension/compatibility warnings. 3951 if (LangOpts.C23) 3952 Diag(EmbedTok, diag::warn_compat_pp_embed_directive); 3953 else 3954 Diag(EmbedTok, diag::ext_pp_embed_directive) 3955 << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0); 3956 3957 // Parse the filename header 3958 Token FilenameTok; 3959 if (LexHeaderName(FilenameTok)) 3960 return; 3961 3962 if (FilenameTok.isNot(tok::header_name)) { 3963 Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); 3964 if (FilenameTok.isNot(tok::eod)) 3965 DiscardUntilEndOfDirective(); 3966 return; 3967 } 3968 3969 // Parse the optional sequence of 3970 // directive-parameters: 3971 // identifier parameter-name-list[opt] directive-argument-list[opt] 3972 // directive-argument-list: 3973 // '(' balanced-token-sequence ')' 3974 // parameter-name-list: 3975 // '::' identifier parameter-name-list[opt] 3976 Token CurTok; 3977 std::optional<LexEmbedParametersResult> Params = 3978 LexEmbedParameters(CurTok, /*ForHasEmbed=*/false); 3979 3980 assert((Params || CurTok.is(tok::eod)) && 3981 "expected success or to be at the end of the directive"); 3982 if (!Params) 3983 return; 3984 3985 // Now, splat the data out! 3986 SmallString<128> FilenameBuffer; 3987 StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); 3988 StringRef OriginalFilename = Filename; 3989 bool isAngled = 3990 GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); 3991 // If GetIncludeFilenameSpelling set the start ptr to null, there was an 3992 // error. 3993 assert(!Filename.empty()); 3994 OptionalFileEntryRef MaybeFileRef = 3995 this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile); 3996 if (!MaybeFileRef) { 3997 // could not find file 3998 if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) { 3999 return; 4000 } 4001 Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; 4002 return; 4003 } 4004 std::optional<llvm::MemoryBufferRef> MaybeFile = 4005 getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef); 4006 if (!MaybeFile) { 4007 // could not find file 4008 Diag(FilenameTok, diag::err_cannot_open_file) 4009 << Filename << "a buffer to the contents could not be created"; 4010 return; 4011 } 4012 StringRef BinaryContents = MaybeFile->getBuffer(); 4013 4014 // The order is important between 'offset' and 'limit'; we want to offset 4015 // first and then limit second; otherwise we may reduce the notional resource 4016 // size to something too small to offset into. 4017 if (Params->MaybeOffsetParam) { 4018 // FIXME: just like with the limit() and if_empty() parameters, this loses 4019 // source fidelity in the AST; it has no idea that there was an offset 4020 // involved. 4021 // offsets all the way to the end of the file make for an empty file. 4022 BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset); 4023 } 4024 4025 if (Params->MaybeLimitParam) { 4026 // FIXME: just like with the clang::offset() and if_empty() parameters, 4027 // this loses source fidelity in the AST; it has no idea there was a limit 4028 // involved. 4029 BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit); 4030 } 4031 4032 if (Callbacks) 4033 Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef, 4034 *Params); 4035 HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents); 4036 } 4037