1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This code simply runs the preprocessor on the input file and prints out the 10 // result. This is the traditional behavior of the -E option. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Frontend/Utils.h" 15 #include "clang/Basic/CharInfo.h" 16 #include "clang/Basic/Diagnostic.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "clang/Frontend/PreprocessorOutputOptions.h" 19 #include "clang/Lex/MacroInfo.h" 20 #include "clang/Lex/PPCallbacks.h" 21 #include "clang/Lex/Pragma.h" 22 #include "clang/Lex/Preprocessor.h" 23 #include "clang/Lex/TokenConcatenation.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cstdio> 30 using namespace clang; 31 32 /// PrintMacroDefinition - Print a macro definition in a form that will be 33 /// properly accepted back as a definition. 34 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, 35 Preprocessor &PP, raw_ostream *OS) { 36 *OS << "#define " << II.getName(); 37 38 if (MI.isFunctionLike()) { 39 *OS << '('; 40 if (!MI.param_empty()) { 41 MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end(); 42 for (; AI+1 != E; ++AI) { 43 *OS << (*AI)->getName(); 44 *OS << ','; 45 } 46 47 // Last argument. 48 if ((*AI)->getName() == "__VA_ARGS__") 49 *OS << "..."; 50 else 51 *OS << (*AI)->getName(); 52 } 53 54 if (MI.isGNUVarargs()) 55 *OS << "..."; // #define foo(x...) 56 57 *OS << ')'; 58 } 59 60 // GCC always emits a space, even if the macro body is empty. However, do not 61 // want to emit two spaces if the first token has a leading space. 62 if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace()) 63 *OS << ' '; 64 65 SmallString<128> SpellingBuffer; 66 for (const auto &T : MI.tokens()) { 67 if (T.hasLeadingSpace()) 68 *OS << ' '; 69 70 *OS << PP.getSpelling(T, SpellingBuffer); 71 } 72 } 73 74 //===----------------------------------------------------------------------===// 75 // Preprocessed token printer 76 //===----------------------------------------------------------------------===// 77 78 namespace { 79 class PrintPPOutputPPCallbacks : public PPCallbacks { 80 Preprocessor &PP; 81 SourceManager &SM; 82 TokenConcatenation ConcatInfo; 83 public: 84 raw_ostream *OS; 85 private: 86 unsigned CurLine; 87 88 bool EmittedTokensOnThisLine; 89 bool EmittedDirectiveOnThisLine; 90 SrcMgr::CharacteristicKind FileType; 91 SmallString<512> CurFilename; 92 bool Initialized; 93 bool DisableLineMarkers; 94 bool DumpDefines; 95 bool DumpIncludeDirectives; 96 bool UseLineDirectives; 97 bool IsFirstFileEntered; 98 bool MinimizeWhitespace; 99 bool DirectivesOnly; 100 bool KeepSystemIncludes; 101 raw_ostream *OrigOS; 102 std::unique_ptr<llvm::raw_null_ostream> NullOS; 103 104 Token PrevTok; 105 Token PrevPrevTok; 106 107 public: 108 PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers, 109 bool defines, bool DumpIncludeDirectives, 110 bool UseLineDirectives, bool MinimizeWhitespace, 111 bool DirectivesOnly, bool KeepSystemIncludes) 112 : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), 113 DisableLineMarkers(lineMarkers), DumpDefines(defines), 114 DumpIncludeDirectives(DumpIncludeDirectives), 115 UseLineDirectives(UseLineDirectives), 116 MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly), 117 KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) { 118 CurLine = 0; 119 CurFilename += "<uninit>"; 120 EmittedTokensOnThisLine = false; 121 EmittedDirectiveOnThisLine = false; 122 FileType = SrcMgr::C_User; 123 Initialized = false; 124 IsFirstFileEntered = false; 125 if (KeepSystemIncludes) 126 NullOS = std::make_unique<llvm::raw_null_ostream>(); 127 128 PrevTok.startToken(); 129 PrevPrevTok.startToken(); 130 } 131 132 bool isMinimizeWhitespace() const { return MinimizeWhitespace; } 133 134 void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } 135 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; } 136 137 void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; } 138 bool hasEmittedDirectiveOnThisLine() const { 139 return EmittedDirectiveOnThisLine; 140 } 141 142 /// Ensure that the output stream position is at the beginning of a new line 143 /// and inserts one if it does not. It is intended to ensure that directives 144 /// inserted by the directives not from the input source (such as #line) are 145 /// in the first column. To insert newlines that represent the input, use 146 /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true). 147 void startNewLineIfNeeded(); 148 149 void FileChanged(SourceLocation Loc, FileChangeReason Reason, 150 SrcMgr::CharacteristicKind FileType, 151 FileID PrevFID) override; 152 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 153 StringRef FileName, bool IsAngled, 154 CharSourceRange FilenameRange, 155 OptionalFileEntryRef File, StringRef SearchPath, 156 StringRef RelativePath, const Module *SuggestedModule, 157 bool ModuleImported, 158 SrcMgr::CharacteristicKind FileType) override; 159 void Ident(SourceLocation Loc, StringRef str) override; 160 void PragmaMessage(SourceLocation Loc, StringRef Namespace, 161 PragmaMessageKind Kind, StringRef Str) override; 162 void PragmaDebug(SourceLocation Loc, StringRef DebugType) override; 163 void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override; 164 void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override; 165 void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace, 166 diag::Severity Map, StringRef Str) override; 167 void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec, 168 ArrayRef<int> Ids) override; 169 void PragmaWarningPush(SourceLocation Loc, int Level) override; 170 void PragmaWarningPop(SourceLocation Loc) override; 171 void PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) override; 172 void PragmaExecCharsetPop(SourceLocation Loc) override; 173 void PragmaAssumeNonNullBegin(SourceLocation Loc) override; 174 void PragmaAssumeNonNullEnd(SourceLocation Loc) override; 175 176 /// Insert whitespace before emitting the next token. 177 /// 178 /// @param Tok Next token to be emitted. 179 /// @param RequireSpace Ensure at least one whitespace is emitted. Useful 180 /// if non-tokens have been emitted to the stream. 181 /// @param RequireSameLine Never emit newlines. Useful when semantics depend 182 /// on being on the same line, such as directives. 183 void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace, 184 bool RequireSameLine); 185 186 /// Move to the line of the provided source location. This will 187 /// return true if a newline was inserted or if 188 /// the requested location is the first token on the first line. 189 /// In these cases the next output will be the first column on the line and 190 /// make it possible to insert indention. The newline was inserted 191 /// implicitly when at the beginning of the file. 192 /// 193 /// @param Tok Token where to move to. 194 /// @param RequireStartOfLine Whether the next line depends on being in the 195 /// first column, such as a directive. 196 /// 197 /// @return Whether column adjustments are necessary. 198 bool MoveToLine(const Token &Tok, bool RequireStartOfLine) { 199 PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation()); 200 unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine; 201 bool IsFirstInFile = 202 Tok.isAtStartOfLine() && PLoc.isValid() && PLoc.getLine() == 1; 203 return MoveToLine(TargetLine, RequireStartOfLine) || IsFirstInFile; 204 } 205 206 /// Move to the line of the provided source location. Returns true if a new 207 /// line was inserted. 208 bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) { 209 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 210 unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine; 211 return MoveToLine(TargetLine, RequireStartOfLine); 212 } 213 bool MoveToLine(unsigned LineNo, bool RequireStartOfLine); 214 215 bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, 216 const Token &Tok) { 217 return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok); 218 } 219 void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr, 220 unsigned ExtraLen=0); 221 bool LineMarkersAreDisabled() const { return DisableLineMarkers; } 222 void HandleNewlinesInToken(const char *TokStr, unsigned Len); 223 224 /// MacroDefined - This hook is called whenever a macro definition is seen. 225 void MacroDefined(const Token &MacroNameTok, 226 const MacroDirective *MD) override; 227 228 /// MacroUndefined - This hook is called whenever a macro #undef is seen. 229 void MacroUndefined(const Token &MacroNameTok, 230 const MacroDefinition &MD, 231 const MacroDirective *Undef) override; 232 233 void BeginModule(const Module *M); 234 void EndModule(const Module *M); 235 }; 236 } // end anonymous namespace 237 238 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo, 239 const char *Extra, 240 unsigned ExtraLen) { 241 startNewLineIfNeeded(); 242 243 // Emit #line directives or GNU line markers depending on what mode we're in. 244 if (UseLineDirectives) { 245 *OS << "#line" << ' ' << LineNo << ' ' << '"'; 246 OS->write_escaped(CurFilename); 247 *OS << '"'; 248 } else { 249 *OS << '#' << ' ' << LineNo << ' ' << '"'; 250 OS->write_escaped(CurFilename); 251 *OS << '"'; 252 253 if (ExtraLen) 254 OS->write(Extra, ExtraLen); 255 256 if (FileType == SrcMgr::C_System) 257 OS->write(" 3", 2); 258 else if (FileType == SrcMgr::C_ExternCSystem) 259 OS->write(" 3 4", 4); 260 } 261 *OS << '\n'; 262 } 263 264 /// MoveToLine - Move the output to the source line specified by the location 265 /// object. We can do this by emitting some number of \n's, or be emitting a 266 /// #line directive. This returns false if already at the specified line, true 267 /// if some newlines were emitted. 268 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo, 269 bool RequireStartOfLine) { 270 // If it is required to start a new line or finish the current, insert 271 // vertical whitespace now and take it into account when moving to the 272 // expected line. 273 bool StartedNewLine = false; 274 if ((RequireStartOfLine && EmittedTokensOnThisLine) || 275 EmittedDirectiveOnThisLine) { 276 *OS << '\n'; 277 StartedNewLine = true; 278 CurLine += 1; 279 EmittedTokensOnThisLine = false; 280 EmittedDirectiveOnThisLine = false; 281 } 282 283 // If this line is "close enough" to the original line, just print newlines, 284 // otherwise print a #line directive. 285 if (CurLine == LineNo) { 286 // Nothing to do if we are already on the correct line. 287 } else if (MinimizeWhitespace && DisableLineMarkers) { 288 // With -E -P -fminimize-whitespace, don't emit anything if not necessary. 289 } else if (!StartedNewLine && LineNo - CurLine == 1) { 290 // Printing a single line has priority over printing a #line directive, even 291 // when minimizing whitespace which otherwise would print #line directives 292 // for every single line. 293 *OS << '\n'; 294 StartedNewLine = true; 295 } else if (!DisableLineMarkers) { 296 if (LineNo - CurLine <= 8) { 297 const char *NewLines = "\n\n\n\n\n\n\n\n"; 298 OS->write(NewLines, LineNo - CurLine); 299 } else { 300 // Emit a #line or line marker. 301 WriteLineInfo(LineNo, nullptr, 0); 302 } 303 StartedNewLine = true; 304 } else if (EmittedTokensOnThisLine) { 305 // If we are not on the correct line and don't need to be line-correct, 306 // at least ensure we start on a new line. 307 *OS << '\n'; 308 StartedNewLine = true; 309 } 310 311 if (StartedNewLine) { 312 EmittedTokensOnThisLine = false; 313 EmittedDirectiveOnThisLine = false; 314 } 315 316 CurLine = LineNo; 317 return StartedNewLine; 318 } 319 320 void PrintPPOutputPPCallbacks::startNewLineIfNeeded() { 321 if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) { 322 *OS << '\n'; 323 EmittedTokensOnThisLine = false; 324 EmittedDirectiveOnThisLine = false; 325 } 326 } 327 328 /// FileChanged - Whenever the preprocessor enters or exits a #include file 329 /// it invokes this handler. Update our conception of the current source 330 /// position. 331 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, 332 FileChangeReason Reason, 333 SrcMgr::CharacteristicKind NewFileType, 334 FileID PrevFID) { 335 // Unless we are exiting a #include, make sure to skip ahead to the line the 336 // #include directive was at. 337 SourceManager &SourceMgr = SM; 338 339 PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc); 340 if (UserLoc.isInvalid()) 341 return; 342 343 unsigned NewLine = UserLoc.getLine(); 344 345 if (Reason == PPCallbacks::EnterFile) { 346 SourceLocation IncludeLoc = UserLoc.getIncludeLoc(); 347 if (IncludeLoc.isValid()) 348 MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false); 349 } else if (Reason == PPCallbacks::SystemHeaderPragma) { 350 // GCC emits the # directive for this directive on the line AFTER the 351 // directive and emits a bunch of spaces that aren't needed. This is because 352 // otherwise we will emit a line marker for THIS line, which requires an 353 // extra blank line after the directive to avoid making all following lines 354 // off by one. We can do better by simply incrementing NewLine here. 355 NewLine += 1; 356 } 357 358 CurLine = NewLine; 359 360 // In KeepSystemIncludes mode, redirect OS as needed. 361 if (KeepSystemIncludes && (isSystem(FileType) != isSystem(NewFileType))) 362 OS = isSystem(FileType) ? OrigOS : NullOS.get(); 363 364 CurFilename.clear(); 365 CurFilename += UserLoc.getFilename(); 366 FileType = NewFileType; 367 368 if (DisableLineMarkers) { 369 if (!MinimizeWhitespace) 370 startNewLineIfNeeded(); 371 return; 372 } 373 374 if (!Initialized) { 375 WriteLineInfo(CurLine); 376 Initialized = true; 377 } 378 379 // Do not emit an enter marker for the main file (which we expect is the first 380 // entered file). This matches gcc, and improves compatibility with some tools 381 // which track the # line markers as a way to determine when the preprocessed 382 // output is in the context of the main file. 383 if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) { 384 IsFirstFileEntered = true; 385 return; 386 } 387 388 switch (Reason) { 389 case PPCallbacks::EnterFile: 390 WriteLineInfo(CurLine, " 1", 2); 391 break; 392 case PPCallbacks::ExitFile: 393 WriteLineInfo(CurLine, " 2", 2); 394 break; 395 case PPCallbacks::SystemHeaderPragma: 396 case PPCallbacks::RenameFile: 397 WriteLineInfo(CurLine); 398 break; 399 } 400 } 401 402 void PrintPPOutputPPCallbacks::InclusionDirective( 403 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, 404 bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, 405 StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule, 406 bool ModuleImported, SrcMgr::CharacteristicKind FileType) { 407 // In -dI mode, dump #include directives prior to dumping their content or 408 // interpretation. Similar for -fkeep-system-includes. 409 if (DumpIncludeDirectives || (KeepSystemIncludes && isSystem(FileType))) { 410 MoveToLine(HashLoc, /*RequireStartOfLine=*/true); 411 const std::string TokenText = PP.getSpelling(IncludeTok); 412 assert(!TokenText.empty()); 413 *OS << "#" << TokenText << " " 414 << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') 415 << " /* clang -E " 416 << (DumpIncludeDirectives ? "-dI" : "-fkeep-system-includes") 417 << " */"; 418 setEmittedDirectiveOnThisLine(); 419 } 420 421 // When preprocessing, turn implicit imports into module import pragmas. 422 if (ModuleImported) { 423 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) { 424 case tok::pp_include: 425 case tok::pp_import: 426 case tok::pp_include_next: 427 MoveToLine(HashLoc, /*RequireStartOfLine=*/true); 428 *OS << "#pragma clang module import " 429 << SuggestedModule->getFullModuleName(true) 430 << " /* clang -E: implicit import for " 431 << "#" << PP.getSpelling(IncludeTok) << " " 432 << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') 433 << " */"; 434 setEmittedDirectiveOnThisLine(); 435 break; 436 437 case tok::pp___include_macros: 438 // #__include_macros has no effect on a user of a preprocessed source 439 // file; the only effect is on preprocessing. 440 // 441 // FIXME: That's not *quite* true: it causes the module in question to 442 // be loaded, which can affect downstream diagnostics. 443 break; 444 445 default: 446 llvm_unreachable("unknown include directive kind"); 447 break; 448 } 449 } 450 } 451 452 /// Handle entering the scope of a module during a module compilation. 453 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) { 454 startNewLineIfNeeded(); 455 *OS << "#pragma clang module begin " << M->getFullModuleName(true); 456 setEmittedDirectiveOnThisLine(); 457 } 458 459 /// Handle leaving the scope of a module during a module compilation. 460 void PrintPPOutputPPCallbacks::EndModule(const Module *M) { 461 startNewLineIfNeeded(); 462 *OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/"; 463 setEmittedDirectiveOnThisLine(); 464 } 465 466 /// Ident - Handle #ident directives when read by the preprocessor. 467 /// 468 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) { 469 MoveToLine(Loc, /*RequireStartOfLine=*/true); 470 471 OS->write("#ident ", strlen("#ident ")); 472 OS->write(S.begin(), S.size()); 473 setEmittedTokensOnThisLine(); 474 } 475 476 /// MacroDefined - This hook is called whenever a macro definition is seen. 477 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok, 478 const MacroDirective *MD) { 479 const MacroInfo *MI = MD->getMacroInfo(); 480 // Print out macro definitions in -dD mode and when we have -fdirectives-only 481 // for C++20 header units. 482 if ((!DumpDefines && !DirectivesOnly) || 483 // Ignore __FILE__ etc. 484 MI->isBuiltinMacro()) 485 return; 486 487 SourceLocation DefLoc = MI->getDefinitionLoc(); 488 if (DirectivesOnly && !MI->isUsed()) { 489 SourceManager &SM = PP.getSourceManager(); 490 if (SM.isWrittenInBuiltinFile(DefLoc) || 491 SM.isWrittenInCommandLineFile(DefLoc)) 492 return; 493 } 494 MoveToLine(DefLoc, /*RequireStartOfLine=*/true); 495 PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS); 496 setEmittedDirectiveOnThisLine(); 497 } 498 499 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok, 500 const MacroDefinition &MD, 501 const MacroDirective *Undef) { 502 // Print out macro definitions in -dD mode and when we have -fdirectives-only 503 // for C++20 header units. 504 if (!DumpDefines && !DirectivesOnly) 505 return; 506 507 MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true); 508 *OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName(); 509 setEmittedDirectiveOnThisLine(); 510 } 511 512 static void outputPrintable(raw_ostream *OS, StringRef Str) { 513 for (unsigned char Char : Str) { 514 if (isPrintable(Char) && Char != '\\' && Char != '"') 515 *OS << (char)Char; 516 else // Output anything hard as an octal escape. 517 *OS << '\\' 518 << (char)('0' + ((Char >> 6) & 7)) 519 << (char)('0' + ((Char >> 3) & 7)) 520 << (char)('0' + ((Char >> 0) & 7)); 521 } 522 } 523 524 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc, 525 StringRef Namespace, 526 PragmaMessageKind Kind, 527 StringRef Str) { 528 MoveToLine(Loc, /*RequireStartOfLine=*/true); 529 *OS << "#pragma "; 530 if (!Namespace.empty()) 531 *OS << Namespace << ' '; 532 switch (Kind) { 533 case PMK_Message: 534 *OS << "message(\""; 535 break; 536 case PMK_Warning: 537 *OS << "warning \""; 538 break; 539 case PMK_Error: 540 *OS << "error \""; 541 break; 542 } 543 544 outputPrintable(OS, Str); 545 *OS << '"'; 546 if (Kind == PMK_Message) 547 *OS << ')'; 548 setEmittedDirectiveOnThisLine(); 549 } 550 551 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc, 552 StringRef DebugType) { 553 MoveToLine(Loc, /*RequireStartOfLine=*/true); 554 555 *OS << "#pragma clang __debug "; 556 *OS << DebugType; 557 558 setEmittedDirectiveOnThisLine(); 559 } 560 561 void PrintPPOutputPPCallbacks:: 562 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) { 563 MoveToLine(Loc, /*RequireStartOfLine=*/true); 564 *OS << "#pragma " << Namespace << " diagnostic push"; 565 setEmittedDirectiveOnThisLine(); 566 } 567 568 void PrintPPOutputPPCallbacks:: 569 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) { 570 MoveToLine(Loc, /*RequireStartOfLine=*/true); 571 *OS << "#pragma " << Namespace << " diagnostic pop"; 572 setEmittedDirectiveOnThisLine(); 573 } 574 575 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc, 576 StringRef Namespace, 577 diag::Severity Map, 578 StringRef Str) { 579 MoveToLine(Loc, /*RequireStartOfLine=*/true); 580 *OS << "#pragma " << Namespace << " diagnostic "; 581 switch (Map) { 582 case diag::Severity::Remark: 583 *OS << "remark"; 584 break; 585 case diag::Severity::Warning: 586 *OS << "warning"; 587 break; 588 case diag::Severity::Error: 589 *OS << "error"; 590 break; 591 case diag::Severity::Ignored: 592 *OS << "ignored"; 593 break; 594 case diag::Severity::Fatal: 595 *OS << "fatal"; 596 break; 597 } 598 *OS << " \"" << Str << '"'; 599 setEmittedDirectiveOnThisLine(); 600 } 601 602 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc, 603 PragmaWarningSpecifier WarningSpec, 604 ArrayRef<int> Ids) { 605 MoveToLine(Loc, /*RequireStartOfLine=*/true); 606 607 *OS << "#pragma warning("; 608 switch(WarningSpec) { 609 case PWS_Default: *OS << "default"; break; 610 case PWS_Disable: *OS << "disable"; break; 611 case PWS_Error: *OS << "error"; break; 612 case PWS_Once: *OS << "once"; break; 613 case PWS_Suppress: *OS << "suppress"; break; 614 case PWS_Level1: *OS << '1'; break; 615 case PWS_Level2: *OS << '2'; break; 616 case PWS_Level3: *OS << '3'; break; 617 case PWS_Level4: *OS << '4'; break; 618 } 619 *OS << ':'; 620 621 for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I) 622 *OS << ' ' << *I; 623 *OS << ')'; 624 setEmittedDirectiveOnThisLine(); 625 } 626 627 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc, 628 int Level) { 629 MoveToLine(Loc, /*RequireStartOfLine=*/true); 630 *OS << "#pragma warning(push"; 631 if (Level >= 0) 632 *OS << ", " << Level; 633 *OS << ')'; 634 setEmittedDirectiveOnThisLine(); 635 } 636 637 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) { 638 MoveToLine(Loc, /*RequireStartOfLine=*/true); 639 *OS << "#pragma warning(pop)"; 640 setEmittedDirectiveOnThisLine(); 641 } 642 643 void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc, 644 StringRef Str) { 645 MoveToLine(Loc, /*RequireStartOfLine=*/true); 646 *OS << "#pragma character_execution_set(push"; 647 if (!Str.empty()) 648 *OS << ", " << Str; 649 *OS << ')'; 650 setEmittedDirectiveOnThisLine(); 651 } 652 653 void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) { 654 MoveToLine(Loc, /*RequireStartOfLine=*/true); 655 *OS << "#pragma character_execution_set(pop)"; 656 setEmittedDirectiveOnThisLine(); 657 } 658 659 void PrintPPOutputPPCallbacks:: 660 PragmaAssumeNonNullBegin(SourceLocation Loc) { 661 MoveToLine(Loc, /*RequireStartOfLine=*/true); 662 *OS << "#pragma clang assume_nonnull begin"; 663 setEmittedDirectiveOnThisLine(); 664 } 665 666 void PrintPPOutputPPCallbacks:: 667 PragmaAssumeNonNullEnd(SourceLocation Loc) { 668 MoveToLine(Loc, /*RequireStartOfLine=*/true); 669 *OS << "#pragma clang assume_nonnull end"; 670 setEmittedDirectiveOnThisLine(); 671 } 672 673 void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, 674 bool RequireSpace, 675 bool RequireSameLine) { 676 // These tokens are not expanded to anything and don't need whitespace before 677 // them. 678 if (Tok.is(tok::eof) || 679 (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && 680 !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && 681 !Tok.is(tok::annot_repl_input_end))) 682 return; 683 684 // EmittedDirectiveOnThisLine takes priority over RequireSameLine. 685 if ((!RequireSameLine || EmittedDirectiveOnThisLine) && 686 MoveToLine(Tok, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine)) { 687 if (MinimizeWhitespace) { 688 // Avoid interpreting hash as a directive under -fpreprocessed. 689 if (Tok.is(tok::hash)) 690 *OS << ' '; 691 } else { 692 // Print out space characters so that the first token on a line is 693 // indented for easy reading. 694 unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation()); 695 696 // The first token on a line can have a column number of 1, yet still 697 // expect leading white space, if a macro expansion in column 1 starts 698 // with an empty macro argument, or an empty nested macro expansion. In 699 // this case, move the token to column 2. 700 if (ColNo == 1 && Tok.hasLeadingSpace()) 701 ColNo = 2; 702 703 // This hack prevents stuff like: 704 // #define HASH # 705 // HASH define foo bar 706 // From having the # character end up at column 1, which makes it so it 707 // is not handled as a #define next time through the preprocessor if in 708 // -fpreprocessed mode. 709 if (ColNo <= 1 && Tok.is(tok::hash)) 710 *OS << ' '; 711 712 // Otherwise, indent the appropriate number of spaces. 713 for (; ColNo > 1; --ColNo) 714 *OS << ' '; 715 } 716 } else { 717 // Insert whitespace between the previous and next token if either 718 // - The caller requires it 719 // - The input had whitespace between them and we are not in 720 // whitespace-minimization mode 721 // - The whitespace is necessary to keep the tokens apart and there is not 722 // already a newline between them 723 if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) || 724 ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) && 725 AvoidConcat(PrevPrevTok, PrevTok, Tok))) 726 *OS << ' '; 727 } 728 729 PrevPrevTok = PrevTok; 730 PrevTok = Tok; 731 } 732 733 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr, 734 unsigned Len) { 735 unsigned NumNewlines = 0; 736 for (; Len; --Len, ++TokStr) { 737 if (*TokStr != '\n' && 738 *TokStr != '\r') 739 continue; 740 741 ++NumNewlines; 742 743 // If we have \n\r or \r\n, skip both and count as one line. 744 if (Len != 1 && 745 (TokStr[1] == '\n' || TokStr[1] == '\r') && 746 TokStr[0] != TokStr[1]) { 747 ++TokStr; 748 --Len; 749 } 750 } 751 752 if (NumNewlines == 0) return; 753 754 CurLine += NumNewlines; 755 } 756 757 758 namespace { 759 struct UnknownPragmaHandler : public PragmaHandler { 760 const char *Prefix; 761 PrintPPOutputPPCallbacks *Callbacks; 762 763 // Set to true if tokens should be expanded 764 bool ShouldExpandTokens; 765 766 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks, 767 bool RequireTokenExpansion) 768 : Prefix(prefix), Callbacks(callbacks), 769 ShouldExpandTokens(RequireTokenExpansion) {} 770 void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, 771 Token &PragmaTok) override { 772 // Figure out what line we went to and insert the appropriate number of 773 // newline characters. 774 Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true); 775 Callbacks->OS->write(Prefix, strlen(Prefix)); 776 Callbacks->setEmittedTokensOnThisLine(); 777 778 if (ShouldExpandTokens) { 779 // The first token does not have expanded macros. Expand them, if 780 // required. 781 auto Toks = std::make_unique<Token[]>(1); 782 Toks[0] = PragmaTok; 783 PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1, 784 /*DisableMacroExpansion=*/false, 785 /*IsReinject=*/false); 786 PP.Lex(PragmaTok); 787 } 788 789 // Read and print all of the pragma tokens. 790 bool IsFirst = true; 791 while (PragmaTok.isNot(tok::eod)) { 792 Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst, 793 /*RequireSameLine=*/true); 794 IsFirst = false; 795 std::string TokSpell = PP.getSpelling(PragmaTok); 796 Callbacks->OS->write(&TokSpell[0], TokSpell.size()); 797 Callbacks->setEmittedTokensOnThisLine(); 798 799 if (ShouldExpandTokens) 800 PP.Lex(PragmaTok); 801 else 802 PP.LexUnexpandedToken(PragmaTok); 803 } 804 Callbacks->setEmittedDirectiveOnThisLine(); 805 } 806 }; 807 } // end anonymous namespace 808 809 810 static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, 811 PrintPPOutputPPCallbacks *Callbacks) { 812 bool DropComments = PP.getLangOpts().TraditionalCPP && 813 !PP.getCommentRetentionState(); 814 815 bool IsStartOfLine = false; 816 char Buffer[256]; 817 while (true) { 818 // Two lines joined with line continuation ('\' as last character on the 819 // line) must be emitted as one line even though Tok.getLine() returns two 820 // different values. In this situation Tok.isAtStartOfLine() is false even 821 // though it may be the first token on the lexical line. When 822 // dropping/skipping a token that is at the start of a line, propagate the 823 // start-of-line-ness to the next token to not append it to the previous 824 // line. 825 IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine(); 826 827 Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false, 828 /*RequireSameLine=*/!IsStartOfLine); 829 830 if (DropComments && Tok.is(tok::comment)) { 831 // Skip comments. Normally the preprocessor does not generate 832 // tok::comment nodes at all when not keeping comments, but under 833 // -traditional-cpp the lexer keeps /all/ whitespace, including comments. 834 PP.Lex(Tok); 835 continue; 836 } else if (Tok.is(tok::annot_repl_input_end)) { 837 PP.Lex(Tok); 838 continue; 839 } else if (Tok.is(tok::eod)) { 840 // Don't print end of directive tokens, since they are typically newlines 841 // that mess up our line tracking. These come from unknown pre-processor 842 // directives or hash-prefixed comments in standalone assembly files. 843 PP.Lex(Tok); 844 // FIXME: The token on the next line after #include should have 845 // Tok.isAtStartOfLine() set. 846 IsStartOfLine = true; 847 continue; 848 } else if (Tok.is(tok::annot_module_include)) { 849 // PrintPPOutputPPCallbacks::InclusionDirective handles producing 850 // appropriate output here. Ignore this token entirely. 851 PP.Lex(Tok); 852 IsStartOfLine = true; 853 continue; 854 } else if (Tok.is(tok::annot_module_begin)) { 855 // FIXME: We retrieve this token after the FileChanged callback, and 856 // retrieve the module_end token before the FileChanged callback, so 857 // we render this within the file and render the module end outside the 858 // file, but this is backwards from the token locations: the module_begin 859 // token is at the include location (outside the file) and the module_end 860 // token is at the EOF location (within the file). 861 Callbacks->BeginModule( 862 reinterpret_cast<Module *>(Tok.getAnnotationValue())); 863 PP.Lex(Tok); 864 IsStartOfLine = true; 865 continue; 866 } else if (Tok.is(tok::annot_module_end)) { 867 Callbacks->EndModule( 868 reinterpret_cast<Module *>(Tok.getAnnotationValue())); 869 PP.Lex(Tok); 870 IsStartOfLine = true; 871 continue; 872 } else if (Tok.is(tok::annot_header_unit)) { 873 // This is a header-name that has been (effectively) converted into a 874 // module-name. 875 // FIXME: The module name could contain non-identifier module name 876 // components. We don't have a good way to round-trip those. 877 Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue()); 878 std::string Name = M->getFullModuleName(); 879 Callbacks->OS->write(Name.data(), Name.size()); 880 Callbacks->HandleNewlinesInToken(Name.data(), Name.size()); 881 } else if (Tok.isAnnotation()) { 882 // Ignore annotation tokens created by pragmas - the pragmas themselves 883 // will be reproduced in the preprocessed output. 884 PP.Lex(Tok); 885 continue; 886 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) { 887 *Callbacks->OS << II->getName(); 888 } else if (Tok.isLiteral() && !Tok.needsCleaning() && 889 Tok.getLiteralData()) { 890 Callbacks->OS->write(Tok.getLiteralData(), Tok.getLength()); 891 } else if (Tok.getLength() < std::size(Buffer)) { 892 const char *TokPtr = Buffer; 893 unsigned Len = PP.getSpelling(Tok, TokPtr); 894 Callbacks->OS->write(TokPtr, Len); 895 896 // Tokens that can contain embedded newlines need to adjust our current 897 // line number. 898 // FIXME: The token may end with a newline in which case 899 // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is 900 // wrong. 901 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) 902 Callbacks->HandleNewlinesInToken(TokPtr, Len); 903 if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' && 904 TokPtr[1] == '/') { 905 // It's a line comment; 906 // Ensure that we don't concatenate anything behind it. 907 Callbacks->setEmittedDirectiveOnThisLine(); 908 } 909 } else { 910 std::string S = PP.getSpelling(Tok); 911 Callbacks->OS->write(S.data(), S.size()); 912 913 // Tokens that can contain embedded newlines need to adjust our current 914 // line number. 915 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) 916 Callbacks->HandleNewlinesInToken(S.data(), S.size()); 917 if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') { 918 // It's a line comment; 919 // Ensure that we don't concatenate anything behind it. 920 Callbacks->setEmittedDirectiveOnThisLine(); 921 } 922 } 923 Callbacks->setEmittedTokensOnThisLine(); 924 IsStartOfLine = false; 925 926 if (Tok.is(tok::eof)) break; 927 928 PP.Lex(Tok); 929 } 930 } 931 932 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair; 933 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) { 934 return LHS->first->getName().compare(RHS->first->getName()); 935 } 936 937 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) { 938 // Ignore unknown pragmas. 939 PP.IgnorePragmas(); 940 941 // -dM mode just scans and ignores all tokens in the files, then dumps out 942 // the macro table at the end. 943 PP.EnterMainSourceFile(); 944 945 Token Tok; 946 do PP.Lex(Tok); 947 while (Tok.isNot(tok::eof)); 948 949 SmallVector<id_macro_pair, 128> MacrosByID; 950 for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end(); 951 I != E; ++I) { 952 auto *MD = I->second.getLatest(); 953 if (MD && MD->isDefined()) 954 MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo())); 955 } 956 llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare); 957 958 for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) { 959 MacroInfo &MI = *MacrosByID[i].second; 960 // Ignore computed macros like __LINE__ and friends. 961 if (MI.isBuiltinMacro()) continue; 962 963 PrintMacroDefinition(*MacrosByID[i].first, MI, PP, OS); 964 *OS << '\n'; 965 } 966 } 967 968 /// DoPrintPreprocessedInput - This implements -E mode. 969 /// 970 void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, 971 const PreprocessorOutputOptions &Opts) { 972 // Show macros with no output is handled specially. 973 if (!Opts.ShowCPP) { 974 assert(Opts.ShowMacros && "Not yet implemented!"); 975 DoPrintMacros(PP, OS); 976 return; 977 } 978 979 // Inform the preprocessor whether we want it to retain comments or not, due 980 // to -C or -CC. 981 PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments); 982 983 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( 984 PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros, 985 Opts.ShowIncludeDirectives, Opts.UseLineDirectives, 986 Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes); 987 988 // Expand macros in pragmas with -fms-extensions. The assumption is that 989 // the majority of pragmas in such a file will be Microsoft pragmas. 990 // Remember the handlers we will add so that we can remove them later. 991 std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler( 992 new UnknownPragmaHandler( 993 "#pragma", Callbacks, 994 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt)); 995 996 std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler( 997 "#pragma GCC", Callbacks, 998 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt)); 999 1000 std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler( 1001 "#pragma clang", Callbacks, 1002 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt)); 1003 1004 PP.AddPragmaHandler(MicrosoftExtHandler.get()); 1005 PP.AddPragmaHandler("GCC", GCCHandler.get()); 1006 PP.AddPragmaHandler("clang", ClangHandler.get()); 1007 1008 // The tokens after pragma omp need to be expanded. 1009 // 1010 // OpenMP [2.1, Directive format] 1011 // Preprocessing tokens following the #pragma omp are subject to macro 1012 // replacement. 1013 std::unique_ptr<UnknownPragmaHandler> OpenMPHandler( 1014 new UnknownPragmaHandler("#pragma omp", Callbacks, 1015 /*RequireTokenExpansion=*/true)); 1016 PP.AddPragmaHandler("omp", OpenMPHandler.get()); 1017 1018 PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks)); 1019 1020 // After we have configured the preprocessor, enter the main file. 1021 PP.EnterMainSourceFile(); 1022 if (Opts.DirectivesOnly) 1023 PP.SetMacroExpansionOnlyInDirectives(); 1024 1025 // Consume all of the tokens that come from the predefines buffer. Those 1026 // should not be emitted into the output and are guaranteed to be at the 1027 // start. 1028 const SourceManager &SourceMgr = PP.getSourceManager(); 1029 Token Tok; 1030 do { 1031 PP.Lex(Tok); 1032 if (Tok.is(tok::eof) || !Tok.getLocation().isFileID()) 1033 break; 1034 1035 PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation()); 1036 if (PLoc.isInvalid()) 1037 break; 1038 1039 if (strcmp(PLoc.getFilename(), "<built-in>")) 1040 break; 1041 } while (true); 1042 1043 // Read all the preprocessed tokens, printing them out to the stream. 1044 PrintPreprocessedTokens(PP, Tok, Callbacks); 1045 *OS << '\n'; 1046 1047 // Remove the handlers we just added to leave the preprocessor in a sane state 1048 // so that it can be reused (for example by a clang::Parser instance). 1049 PP.RemovePragmaHandler(MicrosoftExtHandler.get()); 1050 PP.RemovePragmaHandler("GCC", GCCHandler.get()); 1051 PP.RemovePragmaHandler("clang", ClangHandler.get()); 1052 PP.RemovePragmaHandler("omp", OpenMPHandler.get()); 1053 } 1054