10b57cec5SDimitry Andric //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This code rewrites include invocations into their expansions. This gives you 100b57cec5SDimitry Andric // a file with all included files merged into it. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "clang/Rewrite/Frontend/Rewriters.h" 150b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h" 160b57cec5SDimitry Andric #include "clang/Frontend/PreprocessorOutputOptions.h" 170b57cec5SDimitry Andric #include "clang/Lex/Pragma.h" 180b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h" 190b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h" 200b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 21bdd1243dSDimitry Andric #include <optional> 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric using namespace clang; 240b57cec5SDimitry Andric using namespace llvm; 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric namespace { 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric class InclusionRewriter : public PPCallbacks { 290b57cec5SDimitry Andric /// Information about which #includes were actually performed, 300b57cec5SDimitry Andric /// created by preprocessor callbacks. 310b57cec5SDimitry Andric struct IncludedFile { 320b57cec5SDimitry Andric FileID Id; 330b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType; 3404eeddc0SDimitry Andric IncludedFile(FileID Id, SrcMgr::CharacteristicKind FileType) 3504eeddc0SDimitry Andric : Id(Id), FileType(FileType) {} 360b57cec5SDimitry Andric }; 370b57cec5SDimitry Andric Preprocessor &PP; ///< Used to find inclusion directives. 380b57cec5SDimitry Andric SourceManager &SM; ///< Used to read and manage source files. 390b57cec5SDimitry Andric raw_ostream &OS; ///< The destination stream for rewritten contents. 400b57cec5SDimitry Andric StringRef MainEOL; ///< The line ending marker to use. 41e8d8bef9SDimitry Andric llvm::MemoryBufferRef PredefinesBuffer; ///< The preprocessor predefines. 420b57cec5SDimitry Andric bool ShowLineMarkers; ///< Show #line markers. 430b57cec5SDimitry Andric bool UseLineDirectives; ///< Use of line directives or line markers. 440b57cec5SDimitry Andric /// Tracks where inclusions that change the file are found. 45e8d8bef9SDimitry Andric std::map<SourceLocation, IncludedFile> FileIncludes; 460b57cec5SDimitry Andric /// Tracks where inclusions that import modules are found. 47e8d8bef9SDimitry Andric std::map<SourceLocation, const Module *> ModuleIncludes; 480b57cec5SDimitry Andric /// Tracks where inclusions that enter modules (in a module build) are found. 49e8d8bef9SDimitry Andric std::map<SourceLocation, const Module *> ModuleEntryIncludes; 50a7dea167SDimitry Andric /// Tracks where #if and #elif directives get evaluated and whether to true. 51e8d8bef9SDimitry Andric std::map<SourceLocation, bool> IfConditions; 520b57cec5SDimitry Andric /// Used transitively for building up the FileIncludes mapping over the 530b57cec5SDimitry Andric /// various \c PPCallbacks callbacks. 540b57cec5SDimitry Andric SourceLocation LastInclusionLocation; 550b57cec5SDimitry Andric public: 560b57cec5SDimitry Andric InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers, 570b57cec5SDimitry Andric bool UseLineDirectives); 5804eeddc0SDimitry Andric void Process(FileID FileId, SrcMgr::CharacteristicKind FileType); 59e8d8bef9SDimitry Andric void setPredefinesBuffer(const llvm::MemoryBufferRef &Buf) { 600b57cec5SDimitry Andric PredefinesBuffer = Buf; 610b57cec5SDimitry Andric } 620b57cec5SDimitry Andric void detectMainFileEOL(); 630b57cec5SDimitry Andric void handleModuleBegin(Token &Tok) { 640b57cec5SDimitry Andric assert(Tok.getKind() == tok::annot_module_begin); 65e8d8bef9SDimitry Andric ModuleEntryIncludes.insert( 66e8d8bef9SDimitry Andric {Tok.getLocation(), (Module *)Tok.getAnnotationValue()}); 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric private: 690b57cec5SDimitry Andric void FileChanged(SourceLocation Loc, FileChangeReason Reason, 700b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType, 710b57cec5SDimitry Andric FileID PrevFID) override; 72a7dea167SDimitry Andric void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok, 730b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType) override; 740b57cec5SDimitry Andric void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 750b57cec5SDimitry Andric StringRef FileName, bool IsAngled, 7681ad6265SDimitry Andric CharSourceRange FilenameRange, 77bdd1243dSDimitry Andric OptionalFileEntryRef File, StringRef SearchPath, 78*0fca6ea1SDimitry Andric StringRef RelativePath, const Module *SuggestedModule, 79*0fca6ea1SDimitry Andric bool ModuleImported, 800b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType) override; 81a7dea167SDimitry Andric void If(SourceLocation Loc, SourceRange ConditionRange, 82a7dea167SDimitry Andric ConditionValueKind ConditionValue) override; 83a7dea167SDimitry Andric void Elif(SourceLocation Loc, SourceRange ConditionRange, 84a7dea167SDimitry Andric ConditionValueKind ConditionValue, SourceLocation IfLoc) override; 850b57cec5SDimitry Andric void WriteLineInfo(StringRef Filename, int Line, 860b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType, 870b57cec5SDimitry Andric StringRef Extra = StringRef()); 880b57cec5SDimitry Andric void WriteImplicitModuleImport(const Module *Mod); 89e8d8bef9SDimitry Andric void OutputContentUpTo(const MemoryBufferRef &FromFile, unsigned &WriteFrom, 90e8d8bef9SDimitry Andric unsigned WriteTo, StringRef EOL, int &lines, 910b57cec5SDimitry Andric bool EnsureNewline); 920b57cec5SDimitry Andric void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, 93e8d8bef9SDimitry Andric const MemoryBufferRef &FromFile, StringRef EOL, 945f757f3fSDimitry Andric unsigned &NextToWrite, int &Lines, 955f757f3fSDimitry Andric const IncludedFile *Inc = nullptr); 960b57cec5SDimitry Andric const IncludedFile *FindIncludeAtLocation(SourceLocation Loc) const; 975f757f3fSDimitry Andric StringRef getIncludedFileName(const IncludedFile *Inc) const; 980b57cec5SDimitry Andric const Module *FindModuleAtLocation(SourceLocation Loc) const; 990b57cec5SDimitry Andric const Module *FindEnteredModule(SourceLocation Loc) const; 100a7dea167SDimitry Andric bool IsIfAtLocationTrue(SourceLocation Loc) const; 1010b57cec5SDimitry Andric StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); 1020b57cec5SDimitry Andric }; 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric } // end anonymous namespace 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric /// Initializes an InclusionRewriter with a \p PP source and \p OS destination. 1070b57cec5SDimitry Andric InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, 1080b57cec5SDimitry Andric bool ShowLineMarkers, 1090b57cec5SDimitry Andric bool UseLineDirectives) 1100b57cec5SDimitry Andric : PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"), 111e8d8bef9SDimitry Andric ShowLineMarkers(ShowLineMarkers), UseLineDirectives(UseLineDirectives), 1120b57cec5SDimitry Andric LastInclusionLocation(SourceLocation()) {} 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric /// Write appropriate line information as either #line directives or GNU line 1150b57cec5SDimitry Andric /// markers depending on what mode we're in, including the \p Filename and 1160b57cec5SDimitry Andric /// \p Line we are located at, using the specified \p EOL line separator, and 1170b57cec5SDimitry Andric /// any \p Extra context specifiers in GNU line directives. 1180b57cec5SDimitry Andric void InclusionRewriter::WriteLineInfo(StringRef Filename, int Line, 1190b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType, 1200b57cec5SDimitry Andric StringRef Extra) { 1210b57cec5SDimitry Andric if (!ShowLineMarkers) 1220b57cec5SDimitry Andric return; 1230b57cec5SDimitry Andric if (UseLineDirectives) { 1240b57cec5SDimitry Andric OS << "#line" << ' ' << Line << ' ' << '"'; 1250b57cec5SDimitry Andric OS.write_escaped(Filename); 1260b57cec5SDimitry Andric OS << '"'; 1270b57cec5SDimitry Andric } else { 1280b57cec5SDimitry Andric // Use GNU linemarkers as described here: 1290b57cec5SDimitry Andric // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html 1300b57cec5SDimitry Andric OS << '#' << ' ' << Line << ' ' << '"'; 1310b57cec5SDimitry Andric OS.write_escaped(Filename); 1320b57cec5SDimitry Andric OS << '"'; 1330b57cec5SDimitry Andric if (!Extra.empty()) 1340b57cec5SDimitry Andric OS << Extra; 1350b57cec5SDimitry Andric if (FileType == SrcMgr::C_System) 1360b57cec5SDimitry Andric // "`3' This indicates that the following text comes from a system header 1370b57cec5SDimitry Andric // file, so certain warnings should be suppressed." 1380b57cec5SDimitry Andric OS << " 3"; 1390b57cec5SDimitry Andric else if (FileType == SrcMgr::C_ExternCSystem) 1400b57cec5SDimitry Andric // as above for `3', plus "`4' This indicates that the following text 1410b57cec5SDimitry Andric // should be treated as being wrapped in an implicit extern "C" block." 1420b57cec5SDimitry Andric OS << " 3 4"; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric OS << MainEOL; 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) { 1480b57cec5SDimitry Andric OS << "#pragma clang module import " << Mod->getFullModuleName(true) 1490b57cec5SDimitry Andric << " /* clang -frewrite-includes: implicit import */" << MainEOL; 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric /// FileChanged - Whenever the preprocessor enters or exits a #include file 1530b57cec5SDimitry Andric /// it invokes this handler. 1540b57cec5SDimitry Andric void InclusionRewriter::FileChanged(SourceLocation Loc, 1550b57cec5SDimitry Andric FileChangeReason Reason, 1560b57cec5SDimitry Andric SrcMgr::CharacteristicKind NewFileType, 1570b57cec5SDimitry Andric FileID) { 1580b57cec5SDimitry Andric if (Reason != EnterFile) 1590b57cec5SDimitry Andric return; 1600b57cec5SDimitry Andric if (LastInclusionLocation.isInvalid()) 1610b57cec5SDimitry Andric // we didn't reach this file (eg: the main file) via an inclusion directive 1620b57cec5SDimitry Andric return; 1630b57cec5SDimitry Andric FileID Id = FullSourceLoc(Loc, SM).getFileID(); 1640b57cec5SDimitry Andric auto P = FileIncludes.insert( 16504eeddc0SDimitry Andric std::make_pair(LastInclusionLocation, IncludedFile(Id, NewFileType))); 1660b57cec5SDimitry Andric (void)P; 1670b57cec5SDimitry Andric assert(P.second && "Unexpected revisitation of the same include directive"); 1680b57cec5SDimitry Andric LastInclusionLocation = SourceLocation(); 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric /// Called whenever an inclusion is skipped due to canonical header protection 1720b57cec5SDimitry Andric /// macros. 173a7dea167SDimitry Andric void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/, 1740b57cec5SDimitry Andric const Token & /*FilenameTok*/, 1750b57cec5SDimitry Andric SrcMgr::CharacteristicKind /*FileType*/) { 1760b57cec5SDimitry Andric assert(LastInclusionLocation.isValid() && 1770b57cec5SDimitry Andric "A file, that wasn't found via an inclusion directive, was skipped"); 1780b57cec5SDimitry Andric LastInclusionLocation = SourceLocation(); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric /// This should be called whenever the preprocessor encounters include 1820b57cec5SDimitry Andric /// directives. It does not say whether the file has been included, but it 1830b57cec5SDimitry Andric /// provides more information about the directive (hash location instead 1840b57cec5SDimitry Andric /// of location inside the included file). It is assumed that the matching 1850b57cec5SDimitry Andric /// FileChanged() or FileSkipped() is called after this (or neither is 1860b57cec5SDimitry Andric /// called if this #include results in an error or does not textually include 1870b57cec5SDimitry Andric /// anything). 188bdd1243dSDimitry Andric void InclusionRewriter::InclusionDirective( 189bdd1243dSDimitry Andric SourceLocation HashLoc, const Token & /*IncludeTok*/, 190bdd1243dSDimitry Andric StringRef /*FileName*/, bool /*IsAngled*/, 191bdd1243dSDimitry Andric CharSourceRange /*FilenameRange*/, OptionalFileEntryRef /*File*/, 192bdd1243dSDimitry Andric StringRef /*SearchPath*/, StringRef /*RelativePath*/, 193*0fca6ea1SDimitry Andric const Module *SuggestedModule, bool ModuleImported, 194*0fca6ea1SDimitry Andric SrcMgr::CharacteristicKind FileType) { 195*0fca6ea1SDimitry Andric if (ModuleImported) { 196*0fca6ea1SDimitry Andric auto P = ModuleIncludes.insert(std::make_pair(HashLoc, SuggestedModule)); 1970b57cec5SDimitry Andric (void)P; 1980b57cec5SDimitry Andric assert(P.second && "Unexpected revisitation of the same include directive"); 1990b57cec5SDimitry Andric } else 2000b57cec5SDimitry Andric LastInclusionLocation = HashLoc; 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric 203a7dea167SDimitry Andric void InclusionRewriter::If(SourceLocation Loc, SourceRange ConditionRange, 204a7dea167SDimitry Andric ConditionValueKind ConditionValue) { 205e8d8bef9SDimitry Andric auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True)); 206a7dea167SDimitry Andric (void)P; 207a7dea167SDimitry Andric assert(P.second && "Unexpected revisitation of the same if directive"); 208a7dea167SDimitry Andric } 209a7dea167SDimitry Andric 210a7dea167SDimitry Andric void InclusionRewriter::Elif(SourceLocation Loc, SourceRange ConditionRange, 211a7dea167SDimitry Andric ConditionValueKind ConditionValue, 212a7dea167SDimitry Andric SourceLocation IfLoc) { 213e8d8bef9SDimitry Andric auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True)); 214a7dea167SDimitry Andric (void)P; 215a7dea167SDimitry Andric assert(P.second && "Unexpected revisitation of the same elif directive"); 216a7dea167SDimitry Andric } 217a7dea167SDimitry Andric 2180b57cec5SDimitry Andric /// Simple lookup for a SourceLocation (specifically one denoting the hash in 2190b57cec5SDimitry Andric /// an inclusion directive) in the map of inclusion information, FileChanges. 2200b57cec5SDimitry Andric const InclusionRewriter::IncludedFile * 2210b57cec5SDimitry Andric InclusionRewriter::FindIncludeAtLocation(SourceLocation Loc) const { 222e8d8bef9SDimitry Andric const auto I = FileIncludes.find(Loc); 2230b57cec5SDimitry Andric if (I != FileIncludes.end()) 2240b57cec5SDimitry Andric return &I->second; 2250b57cec5SDimitry Andric return nullptr; 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric /// Simple lookup for a SourceLocation (specifically one denoting the hash in 2290b57cec5SDimitry Andric /// an inclusion directive) in the map of module inclusion information. 2300b57cec5SDimitry Andric const Module * 2310b57cec5SDimitry Andric InclusionRewriter::FindModuleAtLocation(SourceLocation Loc) const { 232e8d8bef9SDimitry Andric const auto I = ModuleIncludes.find(Loc); 2330b57cec5SDimitry Andric if (I != ModuleIncludes.end()) 2340b57cec5SDimitry Andric return I->second; 2350b57cec5SDimitry Andric return nullptr; 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric /// Simple lookup for a SourceLocation (specifically one denoting the hash in 2390b57cec5SDimitry Andric /// an inclusion directive) in the map of module entry information. 2400b57cec5SDimitry Andric const Module * 2410b57cec5SDimitry Andric InclusionRewriter::FindEnteredModule(SourceLocation Loc) const { 242e8d8bef9SDimitry Andric const auto I = ModuleEntryIncludes.find(Loc); 2430b57cec5SDimitry Andric if (I != ModuleEntryIncludes.end()) 2440b57cec5SDimitry Andric return I->second; 2450b57cec5SDimitry Andric return nullptr; 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 248a7dea167SDimitry Andric bool InclusionRewriter::IsIfAtLocationTrue(SourceLocation Loc) const { 249e8d8bef9SDimitry Andric const auto I = IfConditions.find(Loc); 250a7dea167SDimitry Andric if (I != IfConditions.end()) 251a7dea167SDimitry Andric return I->second; 252a7dea167SDimitry Andric return false; 253a7dea167SDimitry Andric } 254a7dea167SDimitry Andric 2550b57cec5SDimitry Andric void InclusionRewriter::detectMainFileEOL() { 256bdd1243dSDimitry Andric std::optional<MemoryBufferRef> FromFile = 257bdd1243dSDimitry Andric *SM.getBufferOrNone(SM.getMainFileID()); 258e8d8bef9SDimitry Andric assert(FromFile); 259e8d8bef9SDimitry Andric if (!FromFile) 2600b57cec5SDimitry Andric return; // Should never happen, but whatever. 26104eeddc0SDimitry Andric MainEOL = FromFile->getBuffer().detectEOL(); 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at 2650b57cec5SDimitry Andric /// \p WriteTo - 1. 266e8d8bef9SDimitry Andric void InclusionRewriter::OutputContentUpTo(const MemoryBufferRef &FromFile, 2670b57cec5SDimitry Andric unsigned &WriteFrom, unsigned WriteTo, 2680b57cec5SDimitry Andric StringRef LocalEOL, int &Line, 2690b57cec5SDimitry Andric bool EnsureNewline) { 2700b57cec5SDimitry Andric if (WriteTo <= WriteFrom) 2710b57cec5SDimitry Andric return; 272e8d8bef9SDimitry Andric if (FromFile == PredefinesBuffer) { 2730b57cec5SDimitry Andric // Ignore the #defines of the predefines buffer. 2740b57cec5SDimitry Andric WriteFrom = WriteTo; 2750b57cec5SDimitry Andric return; 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric // If we would output half of a line ending, advance one character to output 2790b57cec5SDimitry Andric // the whole line ending. All buffers are null terminated, so looking ahead 2800b57cec5SDimitry Andric // one byte is safe. 2810b57cec5SDimitry Andric if (LocalEOL.size() == 2 && 2820b57cec5SDimitry Andric LocalEOL[0] == (FromFile.getBufferStart() + WriteTo)[-1] && 2830b57cec5SDimitry Andric LocalEOL[1] == (FromFile.getBufferStart() + WriteTo)[0]) 2840b57cec5SDimitry Andric WriteTo++; 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric StringRef TextToWrite(FromFile.getBufferStart() + WriteFrom, 2870b57cec5SDimitry Andric WriteTo - WriteFrom); 288bdd1243dSDimitry Andric // count lines manually, it's faster than getPresumedLoc() 289bdd1243dSDimitry Andric Line += TextToWrite.count(LocalEOL); 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric if (MainEOL == LocalEOL) { 2920b57cec5SDimitry Andric OS << TextToWrite; 2930b57cec5SDimitry Andric } else { 2940b57cec5SDimitry Andric // Output the file one line at a time, rewriting the line endings as we go. 2950b57cec5SDimitry Andric StringRef Rest = TextToWrite; 2960b57cec5SDimitry Andric while (!Rest.empty()) { 297bdd1243dSDimitry Andric // Identify and output the next line excluding an EOL sequence if present. 298bdd1243dSDimitry Andric size_t Idx = Rest.find(LocalEOL); 299bdd1243dSDimitry Andric StringRef LineText = Rest.substr(0, Idx); 3000b57cec5SDimitry Andric OS << LineText; 301bdd1243dSDimitry Andric if (Idx != StringRef::npos) { 302bdd1243dSDimitry Andric // An EOL sequence was present, output the EOL sequence for the 303bdd1243dSDimitry Andric // main source file and skip past the local EOL sequence. 3040b57cec5SDimitry Andric OS << MainEOL; 305bdd1243dSDimitry Andric Idx += LocalEOL.size(); 3060b57cec5SDimitry Andric } 307bdd1243dSDimitry Andric // Strip the line just handled. If Idx is npos or matches the end of the 308bdd1243dSDimitry Andric // text, Rest will be set to an empty string and the loop will terminate. 309bdd1243dSDimitry Andric Rest = Rest.substr(Idx); 310bdd1243dSDimitry Andric } 311bdd1243dSDimitry Andric } 3125f757f3fSDimitry Andric if (EnsureNewline && !TextToWrite.ends_with(LocalEOL)) 3130b57cec5SDimitry Andric OS << MainEOL; 314bdd1243dSDimitry Andric 3150b57cec5SDimitry Andric WriteFrom = WriteTo; 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric 3185f757f3fSDimitry Andric StringRef 3195f757f3fSDimitry Andric InclusionRewriter::getIncludedFileName(const IncludedFile *Inc) const { 3205f757f3fSDimitry Andric if (Inc) { 3215f757f3fSDimitry Andric auto B = SM.getBufferOrNone(Inc->Id); 3225f757f3fSDimitry Andric assert(B && "Attempting to process invalid inclusion"); 3235f757f3fSDimitry Andric if (B) 3245f757f3fSDimitry Andric return llvm::sys::path::filename(B->getBufferIdentifier()); 3255f757f3fSDimitry Andric } 3265f757f3fSDimitry Andric return StringRef(); 3275f757f3fSDimitry Andric } 3285f757f3fSDimitry Andric 3290b57cec5SDimitry Andric /// Print characters from \p FromFile starting at \p NextToWrite up until the 3300b57cec5SDimitry Andric /// inclusion directive at \p StartToken, then print out the inclusion 3310b57cec5SDimitry Andric /// inclusion directive disabled by a #if directive, updating \p NextToWrite 3320b57cec5SDimitry Andric /// and \p Line to track the number of source lines visited and the progress 3330b57cec5SDimitry Andric /// through the \p FromFile buffer. 3340b57cec5SDimitry Andric void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, 3350b57cec5SDimitry Andric const Token &StartToken, 336e8d8bef9SDimitry Andric const MemoryBufferRef &FromFile, 3370b57cec5SDimitry Andric StringRef LocalEOL, 3385f757f3fSDimitry Andric unsigned &NextToWrite, int &Line, 3395f757f3fSDimitry Andric const IncludedFile *Inc) { 3400b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite, 3410b57cec5SDimitry Andric SM.getFileOffset(StartToken.getLocation()), LocalEOL, Line, 3420b57cec5SDimitry Andric false); 3430b57cec5SDimitry Andric Token DirectiveToken; 3440b57cec5SDimitry Andric do { 3450b57cec5SDimitry Andric DirectiveLex.LexFromRawLexer(DirectiveToken); 3460b57cec5SDimitry Andric } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); 347e8d8bef9SDimitry Andric if (FromFile == PredefinesBuffer) { 3480b57cec5SDimitry Andric // OutputContentUpTo() would not output anything anyway. 3490b57cec5SDimitry Andric return; 3500b57cec5SDimitry Andric } 3515f757f3fSDimitry Andric if (Inc) { 3525f757f3fSDimitry Andric OS << "#if defined(__CLANG_REWRITTEN_INCLUDES) "; 3535f757f3fSDimitry Andric if (isSystem(Inc->FileType)) 3545f757f3fSDimitry Andric OS << "|| defined(__CLANG_REWRITTEN_SYSTEM_INCLUDES) "; 3555f757f3fSDimitry Andric OS << "/* " << getIncludedFileName(Inc); 3565f757f3fSDimitry Andric } else { 3575f757f3fSDimitry Andric OS << "#if 0 /*"; 3585f757f3fSDimitry Andric } 3595f757f3fSDimitry Andric OS << " expanded by -frewrite-includes */" << MainEOL; 3600b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite, 3610b57cec5SDimitry Andric SM.getFileOffset(DirectiveToken.getLocation()) + 3620b57cec5SDimitry Andric DirectiveToken.getLength(), 3630b57cec5SDimitry Andric LocalEOL, Line, true); 3645f757f3fSDimitry Andric OS << (Inc ? "#else /* " : "#endif /*") << getIncludedFileName(Inc) 3655f757f3fSDimitry Andric << " expanded by -frewrite-includes */" << MainEOL; 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric /// Find the next identifier in the pragma directive specified by \p RawToken. 3690b57cec5SDimitry Andric StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, 3700b57cec5SDimitry Andric Token &RawToken) { 3710b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken); 3720b57cec5SDimitry Andric if (RawToken.is(tok::raw_identifier)) 3730b57cec5SDimitry Andric PP.LookUpIdentifierInfo(RawToken); 3740b57cec5SDimitry Andric if (RawToken.is(tok::identifier)) 3750b57cec5SDimitry Andric return RawToken.getIdentifierInfo()->getName(); 3760b57cec5SDimitry Andric return StringRef(); 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric /// Use a raw lexer to analyze \p FileId, incrementally copying parts of it 3800b57cec5SDimitry Andric /// and including content of included files recursively. 3810b57cec5SDimitry Andric void InclusionRewriter::Process(FileID FileId, 38204eeddc0SDimitry Andric SrcMgr::CharacteristicKind FileType) { 383e8d8bef9SDimitry Andric MemoryBufferRef FromFile; 384e8d8bef9SDimitry Andric { 385e8d8bef9SDimitry Andric auto B = SM.getBufferOrNone(FileId); 386e8d8bef9SDimitry Andric assert(B && "Attempting to process invalid inclusion"); 387e8d8bef9SDimitry Andric if (B) 388e8d8bef9SDimitry Andric FromFile = *B; 389e8d8bef9SDimitry Andric } 3900b57cec5SDimitry Andric StringRef FileName = FromFile.getBufferIdentifier(); 391e8d8bef9SDimitry Andric Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts()); 3920b57cec5SDimitry Andric RawLex.SetCommentRetentionState(false); 3930b57cec5SDimitry Andric 39404eeddc0SDimitry Andric StringRef LocalEOL = FromFile.getBuffer().detectEOL(); 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric // Per the GNU docs: "1" indicates entering a new file. 3970b57cec5SDimitry Andric if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID()) 3980b57cec5SDimitry Andric WriteLineInfo(FileName, 1, FileType, ""); 3990b57cec5SDimitry Andric else 4000b57cec5SDimitry Andric WriteLineInfo(FileName, 1, FileType, " 1"); 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric if (SM.getFileIDSize(FileId) == 0) 4030b57cec5SDimitry Andric return; 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric // The next byte to be copied from the source file, which may be non-zero if 4060b57cec5SDimitry Andric // the lexer handled a BOM. 4070b57cec5SDimitry Andric unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation()); 4080b57cec5SDimitry Andric assert(SM.getLineNumber(FileId, NextToWrite) == 1); 4090b57cec5SDimitry Andric int Line = 1; // The current input file line number. 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric Token RawToken; 4120b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken); 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric // TODO: Consider adding a switch that strips possibly unimportant content, 4150b57cec5SDimitry Andric // such as comments, to reduce the size of repro files. 4160b57cec5SDimitry Andric while (RawToken.isNot(tok::eof)) { 4170b57cec5SDimitry Andric if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { 4180b57cec5SDimitry Andric RawLex.setParsingPreprocessorDirective(true); 4190b57cec5SDimitry Andric Token HashToken = RawToken; 4200b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken); 4210b57cec5SDimitry Andric if (RawToken.is(tok::raw_identifier)) 4220b57cec5SDimitry Andric PP.LookUpIdentifierInfo(RawToken); 4230b57cec5SDimitry Andric if (RawToken.getIdentifierInfo() != nullptr) { 4240b57cec5SDimitry Andric switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { 4250b57cec5SDimitry Andric case tok::pp_include: 4260b57cec5SDimitry Andric case tok::pp_include_next: 4270b57cec5SDimitry Andric case tok::pp_import: { 4285f757f3fSDimitry Andric SourceLocation Loc = HashToken.getLocation(); 4295f757f3fSDimitry Andric const IncludedFile *Inc = FindIncludeAtLocation(Loc); 4305f757f3fSDimitry Andric CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, 4315f757f3fSDimitry Andric NextToWrite, Line, Inc); 4320b57cec5SDimitry Andric if (FileId != PP.getPredefinesFileID()) 4330b57cec5SDimitry Andric WriteLineInfo(FileName, Line - 1, FileType, ""); 4340b57cec5SDimitry Andric StringRef LineInfoExtra; 4350b57cec5SDimitry Andric if (const Module *Mod = FindModuleAtLocation(Loc)) 4360b57cec5SDimitry Andric WriteImplicitModuleImport(Mod); 4375f757f3fSDimitry Andric else if (Inc) { 4380b57cec5SDimitry Andric const Module *Mod = FindEnteredModule(Loc); 4390b57cec5SDimitry Andric if (Mod) 4400b57cec5SDimitry Andric OS << "#pragma clang module begin " 4410b57cec5SDimitry Andric << Mod->getFullModuleName(true) << "\n"; 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric // Include and recursively process the file. 44404eeddc0SDimitry Andric Process(Inc->Id, Inc->FileType); 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric if (Mod) 4470b57cec5SDimitry Andric OS << "#pragma clang module end /*" 4480b57cec5SDimitry Andric << Mod->getFullModuleName(true) << "*/\n"; 4495f757f3fSDimitry Andric // There's no #include, therefore no #if, for -include files. 4505f757f3fSDimitry Andric if (FromFile != PredefinesBuffer) { 4515f757f3fSDimitry Andric OS << "#endif /* " << getIncludedFileName(Inc) 4525f757f3fSDimitry Andric << " expanded by -frewrite-includes */" << LocalEOL; 4535f757f3fSDimitry Andric } 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric // Add line marker to indicate we're returning from an included 4560b57cec5SDimitry Andric // file. 4570b57cec5SDimitry Andric LineInfoExtra = " 2"; 4580b57cec5SDimitry Andric } 4590b57cec5SDimitry Andric // fix up lineinfo (since commented out directive changed line 4600b57cec5SDimitry Andric // numbers) for inclusions that were skipped due to header guards 4610b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType, LineInfoExtra); 4620b57cec5SDimitry Andric break; 4630b57cec5SDimitry Andric } 4640b57cec5SDimitry Andric case tok::pp_pragma: { 4650b57cec5SDimitry Andric StringRef Identifier = NextIdentifierName(RawLex, RawToken); 4660b57cec5SDimitry Andric if (Identifier == "clang" || Identifier == "GCC") { 4670b57cec5SDimitry Andric if (NextIdentifierName(RawLex, RawToken) == "system_header") { 4680b57cec5SDimitry Andric // keep the directive in, commented out 4690b57cec5SDimitry Andric CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, 4700b57cec5SDimitry Andric NextToWrite, Line); 4710b57cec5SDimitry Andric // update our own type 4720b57cec5SDimitry Andric FileType = SM.getFileCharacteristic(RawToken.getLocation()); 4730b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType); 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric } else if (Identifier == "once") { 4760b57cec5SDimitry Andric // keep the directive in, commented out 4770b57cec5SDimitry Andric CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, 4780b57cec5SDimitry Andric NextToWrite, Line); 4790b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType); 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric break; 4820b57cec5SDimitry Andric } 4830b57cec5SDimitry Andric case tok::pp_if: 4840b57cec5SDimitry Andric case tok::pp_elif: { 4850b57cec5SDimitry Andric bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() == 4860b57cec5SDimitry Andric tok::pp_elif); 487a7dea167SDimitry Andric bool isTrue = IsIfAtLocationTrue(RawToken.getLocation()); 4880b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite, 489a7dea167SDimitry Andric SM.getFileOffset(HashToken.getLocation()), 490a7dea167SDimitry Andric LocalEOL, Line, /*EnsureNewline=*/true); 491a7dea167SDimitry Andric do { 492a7dea167SDimitry Andric RawLex.LexFromRawLexer(RawToken); 493a7dea167SDimitry Andric } while (!RawToken.is(tok::eod) && RawToken.isNot(tok::eof)); 494a7dea167SDimitry Andric // We need to disable the old condition, but that is tricky. 495a7dea167SDimitry Andric // Trying to comment it out can easily lead to comment nesting. 496a7dea167SDimitry Andric // So instead make the condition harmless by making it enclose 497a7dea167SDimitry Andric // and empty block. Moreover, put it itself inside an #if 0 block 498a7dea167SDimitry Andric // to disable it from getting evaluated (e.g. __has_include_next 499a7dea167SDimitry Andric // warns if used from the primary source file). 500a7dea167SDimitry Andric OS << "#if 0 /* disabled by -frewrite-includes */" << MainEOL; 5010b57cec5SDimitry Andric if (elif) { 502a7dea167SDimitry Andric OS << "#if 0" << MainEOL; 503a7dea167SDimitry Andric } 5040b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite, 5050b57cec5SDimitry Andric SM.getFileOffset(RawToken.getLocation()) + 5060b57cec5SDimitry Andric RawToken.getLength(), 5070b57cec5SDimitry Andric LocalEOL, Line, /*EnsureNewline=*/true); 508a7dea167SDimitry Andric // Close the empty block and the disabling block. 509a7dea167SDimitry Andric OS << "#endif" << MainEOL; 510a7dea167SDimitry Andric OS << "#endif /* disabled by -frewrite-includes */" << MainEOL; 511a7dea167SDimitry Andric OS << (elif ? "#elif " : "#if ") << (isTrue ? "1" : "0") 512a7dea167SDimitry Andric << " /* evaluated by -frewrite-includes */" << MainEOL; 5130b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType); 5140b57cec5SDimitry Andric break; 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric case tok::pp_endif: 5170b57cec5SDimitry Andric case tok::pp_else: { 5180b57cec5SDimitry Andric // We surround every #include by #if 0 to comment it out, but that 5190b57cec5SDimitry Andric // changes line numbers. These are fixed up right after that, but 5200b57cec5SDimitry Andric // the whole #include could be inside a preprocessor conditional 5210b57cec5SDimitry Andric // that is not processed. So it is necessary to fix the line 5220b57cec5SDimitry Andric // numbers one the next line after each #else/#endif as well. 5230b57cec5SDimitry Andric RawLex.SetKeepWhitespaceMode(true); 5240b57cec5SDimitry Andric do { 5250b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken); 5260b57cec5SDimitry Andric } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof)); 5270b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite, 5280b57cec5SDimitry Andric SM.getFileOffset(RawToken.getLocation()) + 5290b57cec5SDimitry Andric RawToken.getLength(), 5300b57cec5SDimitry Andric LocalEOL, Line, /*EnsureNewline=*/ true); 5310b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType); 5320b57cec5SDimitry Andric RawLex.SetKeepWhitespaceMode(false); 5330b57cec5SDimitry Andric break; 5340b57cec5SDimitry Andric } 5350b57cec5SDimitry Andric default: 5360b57cec5SDimitry Andric break; 5370b57cec5SDimitry Andric } 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric RawLex.setParsingPreprocessorDirective(false); 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken); 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite, 5440b57cec5SDimitry Andric SM.getFileOffset(SM.getLocForEndOfFile(FileId)), LocalEOL, 5450b57cec5SDimitry Andric Line, /*EnsureNewline=*/true); 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric /// InclusionRewriterInInput - Implement -frewrite-includes mode. 5490b57cec5SDimitry Andric void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, 5500b57cec5SDimitry Andric const PreprocessorOutputOptions &Opts) { 5510b57cec5SDimitry Andric SourceManager &SM = PP.getSourceManager(); 5520b57cec5SDimitry Andric InclusionRewriter *Rewrite = new InclusionRewriter( 5530b57cec5SDimitry Andric PP, *OS, Opts.ShowLineMarkers, Opts.UseLineDirectives); 5540b57cec5SDimitry Andric Rewrite->detectMainFileEOL(); 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Rewrite)); 5570b57cec5SDimitry Andric PP.IgnorePragmas(); 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric // First let the preprocessor process the entire file and call callbacks. 5600b57cec5SDimitry Andric // Callbacks will record which #include's were actually performed. 5610b57cec5SDimitry Andric PP.EnterMainSourceFile(); 5620b57cec5SDimitry Andric Token Tok; 5630b57cec5SDimitry Andric // Only preprocessor directives matter here, so disable macro expansion 5640b57cec5SDimitry Andric // everywhere else as an optimization. 5650b57cec5SDimitry Andric // TODO: It would be even faster if the preprocessor could be switched 5660b57cec5SDimitry Andric // to a mode where it would parse only preprocessor directives and comments, 5670b57cec5SDimitry Andric // nothing else matters for parsing or processing. 5680b57cec5SDimitry Andric PP.SetMacroExpansionOnlyInDirectives(); 5690b57cec5SDimitry Andric do { 5700b57cec5SDimitry Andric PP.Lex(Tok); 5710b57cec5SDimitry Andric if (Tok.is(tok::annot_module_begin)) 5720b57cec5SDimitry Andric Rewrite->handleModuleBegin(Tok); 5730b57cec5SDimitry Andric } while (Tok.isNot(tok::eof)); 574e8d8bef9SDimitry Andric Rewrite->setPredefinesBuffer(SM.getBufferOrFake(PP.getPredefinesFileID())); 57504eeddc0SDimitry Andric Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User); 57604eeddc0SDimitry Andric Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); 5770b57cec5SDimitry Andric OS->flush(); 5780b57cec5SDimitry Andric } 579