xref: /openbsd-src/gnu/llvm/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1e5dd7070Spatrick //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick //
9e5dd7070Spatrick // This code rewrites include invocations into their expansions.  This gives you
10e5dd7070Spatrick // a file with all included files merged into it.
11e5dd7070Spatrick //
12e5dd7070Spatrick //===----------------------------------------------------------------------===//
13e5dd7070Spatrick 
14e5dd7070Spatrick #include "clang/Rewrite/Frontend/Rewriters.h"
15e5dd7070Spatrick #include "clang/Basic/SourceManager.h"
16e5dd7070Spatrick #include "clang/Frontend/PreprocessorOutputOptions.h"
17e5dd7070Spatrick #include "clang/Lex/Pragma.h"
18e5dd7070Spatrick #include "clang/Lex/Preprocessor.h"
19e5dd7070Spatrick #include "llvm/ADT/SmallString.h"
20e5dd7070Spatrick #include "llvm/Support/raw_ostream.h"
21*12c85518Srobert #include <optional>
22e5dd7070Spatrick 
23e5dd7070Spatrick using namespace clang;
24e5dd7070Spatrick using namespace llvm;
25e5dd7070Spatrick 
26e5dd7070Spatrick namespace {
27e5dd7070Spatrick 
28e5dd7070Spatrick class InclusionRewriter : public PPCallbacks {
29e5dd7070Spatrick   /// Information about which #includes were actually performed,
30e5dd7070Spatrick   /// created by preprocessor callbacks.
31e5dd7070Spatrick   struct IncludedFile {
32e5dd7070Spatrick     FileID Id;
33e5dd7070Spatrick     SrcMgr::CharacteristicKind FileType;
IncludedFile__anone0546db90111::InclusionRewriter::IncludedFile34*12c85518Srobert     IncludedFile(FileID Id, SrcMgr::CharacteristicKind FileType)
35*12c85518Srobert         : Id(Id), FileType(FileType) {}
36e5dd7070Spatrick   };
37e5dd7070Spatrick   Preprocessor &PP; ///< Used to find inclusion directives.
38e5dd7070Spatrick   SourceManager &SM; ///< Used to read and manage source files.
39e5dd7070Spatrick   raw_ostream &OS; ///< The destination stream for rewritten contents.
40e5dd7070Spatrick   StringRef MainEOL; ///< The line ending marker to use.
41a9ac8606Spatrick   llvm::MemoryBufferRef PredefinesBuffer; ///< The preprocessor predefines.
42e5dd7070Spatrick   bool ShowLineMarkers; ///< Show #line markers.
43e5dd7070Spatrick   bool UseLineDirectives; ///< Use of line directives or line markers.
44e5dd7070Spatrick   /// Tracks where inclusions that change the file are found.
45a9ac8606Spatrick   std::map<SourceLocation, IncludedFile> FileIncludes;
46e5dd7070Spatrick   /// Tracks where inclusions that import modules are found.
47a9ac8606Spatrick   std::map<SourceLocation, const Module *> ModuleIncludes;
48e5dd7070Spatrick   /// Tracks where inclusions that enter modules (in a module build) are found.
49a9ac8606Spatrick   std::map<SourceLocation, const Module *> ModuleEntryIncludes;
50e5dd7070Spatrick   /// Tracks where #if and #elif directives get evaluated and whether to true.
51a9ac8606Spatrick   std::map<SourceLocation, bool> IfConditions;
52e5dd7070Spatrick   /// Used transitively for building up the FileIncludes mapping over the
53e5dd7070Spatrick   /// various \c PPCallbacks callbacks.
54e5dd7070Spatrick   SourceLocation LastInclusionLocation;
55e5dd7070Spatrick public:
56e5dd7070Spatrick   InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers,
57e5dd7070Spatrick                     bool UseLineDirectives);
58*12c85518Srobert   void Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
setPredefinesBuffer(const llvm::MemoryBufferRef & Buf)59a9ac8606Spatrick   void setPredefinesBuffer(const llvm::MemoryBufferRef &Buf) {
60e5dd7070Spatrick     PredefinesBuffer = Buf;
61e5dd7070Spatrick   }
62e5dd7070Spatrick   void detectMainFileEOL();
handleModuleBegin(Token & Tok)63e5dd7070Spatrick   void handleModuleBegin(Token &Tok) {
64e5dd7070Spatrick     assert(Tok.getKind() == tok::annot_module_begin);
65a9ac8606Spatrick     ModuleEntryIncludes.insert(
66a9ac8606Spatrick         {Tok.getLocation(), (Module *)Tok.getAnnotationValue()});
67e5dd7070Spatrick   }
68e5dd7070Spatrick private:
69e5dd7070Spatrick   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
70e5dd7070Spatrick                    SrcMgr::CharacteristicKind FileType,
71e5dd7070Spatrick                    FileID PrevFID) override;
72e5dd7070Spatrick   void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,
73e5dd7070Spatrick                    SrcMgr::CharacteristicKind FileType) override;
74e5dd7070Spatrick   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
75e5dd7070Spatrick                           StringRef FileName, bool IsAngled,
76*12c85518Srobert                           CharSourceRange FilenameRange,
77*12c85518Srobert                           OptionalFileEntryRef File, StringRef SearchPath,
78*12c85518Srobert                           StringRef RelativePath, const Module *Imported,
79e5dd7070Spatrick                           SrcMgr::CharacteristicKind FileType) override;
80e5dd7070Spatrick   void If(SourceLocation Loc, SourceRange ConditionRange,
81e5dd7070Spatrick           ConditionValueKind ConditionValue) override;
82e5dd7070Spatrick   void Elif(SourceLocation Loc, SourceRange ConditionRange,
83e5dd7070Spatrick             ConditionValueKind ConditionValue, SourceLocation IfLoc) override;
84e5dd7070Spatrick   void WriteLineInfo(StringRef Filename, int Line,
85e5dd7070Spatrick                      SrcMgr::CharacteristicKind FileType,
86e5dd7070Spatrick                      StringRef Extra = StringRef());
87e5dd7070Spatrick   void WriteImplicitModuleImport(const Module *Mod);
88a9ac8606Spatrick   void OutputContentUpTo(const MemoryBufferRef &FromFile, unsigned &WriteFrom,
89a9ac8606Spatrick                          unsigned WriteTo, StringRef EOL, int &lines,
90e5dd7070Spatrick                          bool EnsureNewline);
91e5dd7070Spatrick   void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
92a9ac8606Spatrick                            const MemoryBufferRef &FromFile, StringRef EOL,
93e5dd7070Spatrick                            unsigned &NextToWrite, int &Lines);
94e5dd7070Spatrick   const IncludedFile *FindIncludeAtLocation(SourceLocation Loc) const;
95e5dd7070Spatrick   const Module *FindModuleAtLocation(SourceLocation Loc) const;
96e5dd7070Spatrick   const Module *FindEnteredModule(SourceLocation Loc) const;
97e5dd7070Spatrick   bool IsIfAtLocationTrue(SourceLocation Loc) const;
98e5dd7070Spatrick   StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
99e5dd7070Spatrick };
100e5dd7070Spatrick 
101e5dd7070Spatrick }  // end anonymous namespace
102e5dd7070Spatrick 
103e5dd7070Spatrick /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
InclusionRewriter(Preprocessor & PP,raw_ostream & OS,bool ShowLineMarkers,bool UseLineDirectives)104e5dd7070Spatrick InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
105e5dd7070Spatrick                                      bool ShowLineMarkers,
106e5dd7070Spatrick                                      bool UseLineDirectives)
107e5dd7070Spatrick     : PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"),
108a9ac8606Spatrick       ShowLineMarkers(ShowLineMarkers), UseLineDirectives(UseLineDirectives),
109e5dd7070Spatrick       LastInclusionLocation(SourceLocation()) {}
110e5dd7070Spatrick 
111e5dd7070Spatrick /// Write appropriate line information as either #line directives or GNU line
112e5dd7070Spatrick /// markers depending on what mode we're in, including the \p Filename and
113e5dd7070Spatrick /// \p Line we are located at, using the specified \p EOL line separator, and
114e5dd7070Spatrick /// any \p Extra context specifiers in GNU line directives.
WriteLineInfo(StringRef Filename,int Line,SrcMgr::CharacteristicKind FileType,StringRef Extra)115e5dd7070Spatrick void InclusionRewriter::WriteLineInfo(StringRef Filename, int Line,
116e5dd7070Spatrick                                       SrcMgr::CharacteristicKind FileType,
117e5dd7070Spatrick                                       StringRef Extra) {
118e5dd7070Spatrick   if (!ShowLineMarkers)
119e5dd7070Spatrick     return;
120e5dd7070Spatrick   if (UseLineDirectives) {
121e5dd7070Spatrick     OS << "#line" << ' ' << Line << ' ' << '"';
122e5dd7070Spatrick     OS.write_escaped(Filename);
123e5dd7070Spatrick     OS << '"';
124e5dd7070Spatrick   } else {
125e5dd7070Spatrick     // Use GNU linemarkers as described here:
126e5dd7070Spatrick     // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
127e5dd7070Spatrick     OS << '#' << ' ' << Line << ' ' << '"';
128e5dd7070Spatrick     OS.write_escaped(Filename);
129e5dd7070Spatrick     OS << '"';
130e5dd7070Spatrick     if (!Extra.empty())
131e5dd7070Spatrick       OS << Extra;
132e5dd7070Spatrick     if (FileType == SrcMgr::C_System)
133e5dd7070Spatrick       // "`3' This indicates that the following text comes from a system header
134e5dd7070Spatrick       // file, so certain warnings should be suppressed."
135e5dd7070Spatrick       OS << " 3";
136e5dd7070Spatrick     else if (FileType == SrcMgr::C_ExternCSystem)
137e5dd7070Spatrick       // as above for `3', plus "`4' This indicates that the following text
138e5dd7070Spatrick       // should be treated as being wrapped in an implicit extern "C" block."
139e5dd7070Spatrick       OS << " 3 4";
140e5dd7070Spatrick   }
141e5dd7070Spatrick   OS << MainEOL;
142e5dd7070Spatrick }
143e5dd7070Spatrick 
WriteImplicitModuleImport(const Module * Mod)144e5dd7070Spatrick void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) {
145e5dd7070Spatrick   OS << "#pragma clang module import " << Mod->getFullModuleName(true)
146e5dd7070Spatrick      << " /* clang -frewrite-includes: implicit import */" << MainEOL;
147e5dd7070Spatrick }
148e5dd7070Spatrick 
149e5dd7070Spatrick /// FileChanged - Whenever the preprocessor enters or exits a #include file
150e5dd7070Spatrick /// it invokes this handler.
FileChanged(SourceLocation Loc,FileChangeReason Reason,SrcMgr::CharacteristicKind NewFileType,FileID)151e5dd7070Spatrick void InclusionRewriter::FileChanged(SourceLocation Loc,
152e5dd7070Spatrick                                     FileChangeReason Reason,
153e5dd7070Spatrick                                     SrcMgr::CharacteristicKind NewFileType,
154e5dd7070Spatrick                                     FileID) {
155e5dd7070Spatrick   if (Reason != EnterFile)
156e5dd7070Spatrick     return;
157e5dd7070Spatrick   if (LastInclusionLocation.isInvalid())
158e5dd7070Spatrick     // we didn't reach this file (eg: the main file) via an inclusion directive
159e5dd7070Spatrick     return;
160e5dd7070Spatrick   FileID Id = FullSourceLoc(Loc, SM).getFileID();
161e5dd7070Spatrick   auto P = FileIncludes.insert(
162*12c85518Srobert       std::make_pair(LastInclusionLocation, IncludedFile(Id, NewFileType)));
163e5dd7070Spatrick   (void)P;
164e5dd7070Spatrick   assert(P.second && "Unexpected revisitation of the same include directive");
165e5dd7070Spatrick   LastInclusionLocation = SourceLocation();
166e5dd7070Spatrick }
167e5dd7070Spatrick 
168e5dd7070Spatrick /// Called whenever an inclusion is skipped due to canonical header protection
169e5dd7070Spatrick /// macros.
FileSkipped(const FileEntryRef &,const Token &,SrcMgr::CharacteristicKind)170e5dd7070Spatrick void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/,
171e5dd7070Spatrick                                     const Token & /*FilenameTok*/,
172e5dd7070Spatrick                                     SrcMgr::CharacteristicKind /*FileType*/) {
173e5dd7070Spatrick   assert(LastInclusionLocation.isValid() &&
174e5dd7070Spatrick          "A file, that wasn't found via an inclusion directive, was skipped");
175e5dd7070Spatrick   LastInclusionLocation = SourceLocation();
176e5dd7070Spatrick }
177e5dd7070Spatrick 
178e5dd7070Spatrick /// This should be called whenever the preprocessor encounters include
179e5dd7070Spatrick /// directives. It does not say whether the file has been included, but it
180e5dd7070Spatrick /// provides more information about the directive (hash location instead
181e5dd7070Spatrick /// of location inside the included file). It is assumed that the matching
182e5dd7070Spatrick /// FileChanged() or FileSkipped() is called after this (or neither is
183e5dd7070Spatrick /// called if this #include results in an error or does not textually include
184e5dd7070Spatrick /// anything).
InclusionDirective(SourceLocation HashLoc,const Token &,StringRef,bool,CharSourceRange,OptionalFileEntryRef,StringRef,StringRef,const Module * Imported,SrcMgr::CharacteristicKind FileType)185*12c85518Srobert void InclusionRewriter::InclusionDirective(
186*12c85518Srobert     SourceLocation HashLoc, const Token & /*IncludeTok*/,
187*12c85518Srobert     StringRef /*FileName*/, bool /*IsAngled*/,
188*12c85518Srobert     CharSourceRange /*FilenameRange*/, OptionalFileEntryRef /*File*/,
189*12c85518Srobert     StringRef /*SearchPath*/, StringRef /*RelativePath*/,
190*12c85518Srobert     const Module *Imported, SrcMgr::CharacteristicKind FileType) {
191e5dd7070Spatrick   if (Imported) {
192a9ac8606Spatrick     auto P = ModuleIncludes.insert(std::make_pair(HashLoc, Imported));
193e5dd7070Spatrick     (void)P;
194e5dd7070Spatrick     assert(P.second && "Unexpected revisitation of the same include directive");
195e5dd7070Spatrick   } else
196e5dd7070Spatrick     LastInclusionLocation = HashLoc;
197e5dd7070Spatrick }
198e5dd7070Spatrick 
If(SourceLocation Loc,SourceRange ConditionRange,ConditionValueKind ConditionValue)199e5dd7070Spatrick void InclusionRewriter::If(SourceLocation Loc, SourceRange ConditionRange,
200e5dd7070Spatrick                            ConditionValueKind ConditionValue) {
201a9ac8606Spatrick   auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));
202e5dd7070Spatrick   (void)P;
203e5dd7070Spatrick   assert(P.second && "Unexpected revisitation of the same if directive");
204e5dd7070Spatrick }
205e5dd7070Spatrick 
Elif(SourceLocation Loc,SourceRange ConditionRange,ConditionValueKind ConditionValue,SourceLocation IfLoc)206e5dd7070Spatrick void InclusionRewriter::Elif(SourceLocation Loc, SourceRange ConditionRange,
207e5dd7070Spatrick                              ConditionValueKind ConditionValue,
208e5dd7070Spatrick                              SourceLocation IfLoc) {
209a9ac8606Spatrick   auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));
210e5dd7070Spatrick   (void)P;
211e5dd7070Spatrick   assert(P.second && "Unexpected revisitation of the same elif directive");
212e5dd7070Spatrick }
213e5dd7070Spatrick 
214e5dd7070Spatrick /// Simple lookup for a SourceLocation (specifically one denoting the hash in
215e5dd7070Spatrick /// an inclusion directive) in the map of inclusion information, FileChanges.
216e5dd7070Spatrick const InclusionRewriter::IncludedFile *
FindIncludeAtLocation(SourceLocation Loc) const217e5dd7070Spatrick InclusionRewriter::FindIncludeAtLocation(SourceLocation Loc) const {
218a9ac8606Spatrick   const auto I = FileIncludes.find(Loc);
219e5dd7070Spatrick   if (I != FileIncludes.end())
220e5dd7070Spatrick     return &I->second;
221e5dd7070Spatrick   return nullptr;
222e5dd7070Spatrick }
223e5dd7070Spatrick 
224e5dd7070Spatrick /// Simple lookup for a SourceLocation (specifically one denoting the hash in
225e5dd7070Spatrick /// an inclusion directive) in the map of module inclusion information.
226e5dd7070Spatrick const Module *
FindModuleAtLocation(SourceLocation Loc) const227e5dd7070Spatrick InclusionRewriter::FindModuleAtLocation(SourceLocation Loc) const {
228a9ac8606Spatrick   const auto I = ModuleIncludes.find(Loc);
229e5dd7070Spatrick   if (I != ModuleIncludes.end())
230e5dd7070Spatrick     return I->second;
231e5dd7070Spatrick   return nullptr;
232e5dd7070Spatrick }
233e5dd7070Spatrick 
234e5dd7070Spatrick /// Simple lookup for a SourceLocation (specifically one denoting the hash in
235e5dd7070Spatrick /// an inclusion directive) in the map of module entry information.
236e5dd7070Spatrick const Module *
FindEnteredModule(SourceLocation Loc) const237e5dd7070Spatrick InclusionRewriter::FindEnteredModule(SourceLocation Loc) const {
238a9ac8606Spatrick   const auto I = ModuleEntryIncludes.find(Loc);
239e5dd7070Spatrick   if (I != ModuleEntryIncludes.end())
240e5dd7070Spatrick     return I->second;
241e5dd7070Spatrick   return nullptr;
242e5dd7070Spatrick }
243e5dd7070Spatrick 
IsIfAtLocationTrue(SourceLocation Loc) const244e5dd7070Spatrick bool InclusionRewriter::IsIfAtLocationTrue(SourceLocation Loc) const {
245a9ac8606Spatrick   const auto I = IfConditions.find(Loc);
246e5dd7070Spatrick   if (I != IfConditions.end())
247e5dd7070Spatrick     return I->second;
248e5dd7070Spatrick   return false;
249e5dd7070Spatrick }
250e5dd7070Spatrick 
detectMainFileEOL()251e5dd7070Spatrick void InclusionRewriter::detectMainFileEOL() {
252*12c85518Srobert   std::optional<MemoryBufferRef> FromFile =
253*12c85518Srobert       *SM.getBufferOrNone(SM.getMainFileID());
254a9ac8606Spatrick   assert(FromFile);
255a9ac8606Spatrick   if (!FromFile)
256e5dd7070Spatrick     return; // Should never happen, but whatever.
257*12c85518Srobert   MainEOL = FromFile->getBuffer().detectEOL();
258e5dd7070Spatrick }
259e5dd7070Spatrick 
260e5dd7070Spatrick /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
261e5dd7070Spatrick /// \p WriteTo - 1.
OutputContentUpTo(const MemoryBufferRef & FromFile,unsigned & WriteFrom,unsigned WriteTo,StringRef LocalEOL,int & Line,bool EnsureNewline)262a9ac8606Spatrick void InclusionRewriter::OutputContentUpTo(const MemoryBufferRef &FromFile,
263e5dd7070Spatrick                                           unsigned &WriteFrom, unsigned WriteTo,
264e5dd7070Spatrick                                           StringRef LocalEOL, int &Line,
265e5dd7070Spatrick                                           bool EnsureNewline) {
266e5dd7070Spatrick   if (WriteTo <= WriteFrom)
267e5dd7070Spatrick     return;
268a9ac8606Spatrick   if (FromFile == PredefinesBuffer) {
269e5dd7070Spatrick     // Ignore the #defines of the predefines buffer.
270e5dd7070Spatrick     WriteFrom = WriteTo;
271e5dd7070Spatrick     return;
272e5dd7070Spatrick   }
273e5dd7070Spatrick 
274e5dd7070Spatrick   // If we would output half of a line ending, advance one character to output
275e5dd7070Spatrick   // the whole line ending.  All buffers are null terminated, so looking ahead
276e5dd7070Spatrick   // one byte is safe.
277e5dd7070Spatrick   if (LocalEOL.size() == 2 &&
278e5dd7070Spatrick       LocalEOL[0] == (FromFile.getBufferStart() + WriteTo)[-1] &&
279e5dd7070Spatrick       LocalEOL[1] == (FromFile.getBufferStart() + WriteTo)[0])
280e5dd7070Spatrick     WriteTo++;
281e5dd7070Spatrick 
282e5dd7070Spatrick   StringRef TextToWrite(FromFile.getBufferStart() + WriteFrom,
283e5dd7070Spatrick                         WriteTo - WriteFrom);
284*12c85518Srobert   // count lines manually, it's faster than getPresumedLoc()
285*12c85518Srobert   Line += TextToWrite.count(LocalEOL);
286e5dd7070Spatrick 
287e5dd7070Spatrick   if (MainEOL == LocalEOL) {
288e5dd7070Spatrick     OS << TextToWrite;
289e5dd7070Spatrick   } else {
290e5dd7070Spatrick     // Output the file one line at a time, rewriting the line endings as we go.
291e5dd7070Spatrick     StringRef Rest = TextToWrite;
292e5dd7070Spatrick     while (!Rest.empty()) {
293*12c85518Srobert       // Identify and output the next line excluding an EOL sequence if present.
294*12c85518Srobert       size_t Idx = Rest.find(LocalEOL);
295*12c85518Srobert       StringRef LineText = Rest.substr(0, Idx);
296e5dd7070Spatrick       OS << LineText;
297*12c85518Srobert       if (Idx != StringRef::npos) {
298*12c85518Srobert         // An EOL sequence was present, output the EOL sequence for the
299*12c85518Srobert         // main source file and skip past the local EOL sequence.
300e5dd7070Spatrick         OS << MainEOL;
301*12c85518Srobert         Idx += LocalEOL.size();
302e5dd7070Spatrick       }
303*12c85518Srobert       // Strip the line just handled. If Idx is npos or matches the end of the
304*12c85518Srobert       // text, Rest will be set to an empty string and the loop will terminate.
305*12c85518Srobert       Rest = Rest.substr(Idx);
306*12c85518Srobert     }
307*12c85518Srobert   }
308*12c85518Srobert   if (EnsureNewline && !TextToWrite.endswith(LocalEOL))
309e5dd7070Spatrick     OS << MainEOL;
310*12c85518Srobert 
311e5dd7070Spatrick   WriteFrom = WriteTo;
312e5dd7070Spatrick }
313e5dd7070Spatrick 
314e5dd7070Spatrick /// Print characters from \p FromFile starting at \p NextToWrite up until the
315e5dd7070Spatrick /// inclusion directive at \p StartToken, then print out the inclusion
316e5dd7070Spatrick /// inclusion directive disabled by a #if directive, updating \p NextToWrite
317e5dd7070Spatrick /// and \p Line to track the number of source lines visited and the progress
318e5dd7070Spatrick /// through the \p FromFile buffer.
CommentOutDirective(Lexer & DirectiveLex,const Token & StartToken,const MemoryBufferRef & FromFile,StringRef LocalEOL,unsigned & NextToWrite,int & Line)319e5dd7070Spatrick void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
320e5dd7070Spatrick                                             const Token &StartToken,
321a9ac8606Spatrick                                             const MemoryBufferRef &FromFile,
322e5dd7070Spatrick                                             StringRef LocalEOL,
323e5dd7070Spatrick                                             unsigned &NextToWrite, int &Line) {
324e5dd7070Spatrick   OutputContentUpTo(FromFile, NextToWrite,
325e5dd7070Spatrick                     SM.getFileOffset(StartToken.getLocation()), LocalEOL, Line,
326e5dd7070Spatrick                     false);
327e5dd7070Spatrick   Token DirectiveToken;
328e5dd7070Spatrick   do {
329e5dd7070Spatrick     DirectiveLex.LexFromRawLexer(DirectiveToken);
330e5dd7070Spatrick   } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
331a9ac8606Spatrick   if (FromFile == PredefinesBuffer) {
332e5dd7070Spatrick     // OutputContentUpTo() would not output anything anyway.
333e5dd7070Spatrick     return;
334e5dd7070Spatrick   }
335e5dd7070Spatrick   OS << "#if 0 /* expanded by -frewrite-includes */" << MainEOL;
336e5dd7070Spatrick   OutputContentUpTo(FromFile, NextToWrite,
337e5dd7070Spatrick                     SM.getFileOffset(DirectiveToken.getLocation()) +
338e5dd7070Spatrick                         DirectiveToken.getLength(),
339e5dd7070Spatrick                     LocalEOL, Line, true);
340e5dd7070Spatrick   OS << "#endif /* expanded by -frewrite-includes */" << MainEOL;
341e5dd7070Spatrick }
342e5dd7070Spatrick 
343e5dd7070Spatrick /// Find the next identifier in the pragma directive specified by \p RawToken.
NextIdentifierName(Lexer & RawLex,Token & RawToken)344e5dd7070Spatrick StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
345e5dd7070Spatrick                                                 Token &RawToken) {
346e5dd7070Spatrick   RawLex.LexFromRawLexer(RawToken);
347e5dd7070Spatrick   if (RawToken.is(tok::raw_identifier))
348e5dd7070Spatrick     PP.LookUpIdentifierInfo(RawToken);
349e5dd7070Spatrick   if (RawToken.is(tok::identifier))
350e5dd7070Spatrick     return RawToken.getIdentifierInfo()->getName();
351e5dd7070Spatrick   return StringRef();
352e5dd7070Spatrick }
353e5dd7070Spatrick 
354e5dd7070Spatrick /// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
355e5dd7070Spatrick /// and including content of included files recursively.
Process(FileID FileId,SrcMgr::CharacteristicKind FileType)356e5dd7070Spatrick void InclusionRewriter::Process(FileID FileId,
357*12c85518Srobert                                 SrcMgr::CharacteristicKind FileType) {
358a9ac8606Spatrick   MemoryBufferRef FromFile;
359a9ac8606Spatrick   {
360a9ac8606Spatrick     auto B = SM.getBufferOrNone(FileId);
361a9ac8606Spatrick     assert(B && "Attempting to process invalid inclusion");
362a9ac8606Spatrick     if (B)
363a9ac8606Spatrick       FromFile = *B;
364a9ac8606Spatrick   }
365e5dd7070Spatrick   StringRef FileName = FromFile.getBufferIdentifier();
366a9ac8606Spatrick   Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts());
367e5dd7070Spatrick   RawLex.SetCommentRetentionState(false);
368e5dd7070Spatrick 
369*12c85518Srobert   StringRef LocalEOL = FromFile.getBuffer().detectEOL();
370e5dd7070Spatrick 
371e5dd7070Spatrick   // Per the GNU docs: "1" indicates entering a new file.
372e5dd7070Spatrick   if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
373e5dd7070Spatrick     WriteLineInfo(FileName, 1, FileType, "");
374e5dd7070Spatrick   else
375e5dd7070Spatrick     WriteLineInfo(FileName, 1, FileType, " 1");
376e5dd7070Spatrick 
377e5dd7070Spatrick   if (SM.getFileIDSize(FileId) == 0)
378e5dd7070Spatrick     return;
379e5dd7070Spatrick 
380e5dd7070Spatrick   // The next byte to be copied from the source file, which may be non-zero if
381e5dd7070Spatrick   // the lexer handled a BOM.
382e5dd7070Spatrick   unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
383e5dd7070Spatrick   assert(SM.getLineNumber(FileId, NextToWrite) == 1);
384e5dd7070Spatrick   int Line = 1; // The current input file line number.
385e5dd7070Spatrick 
386e5dd7070Spatrick   Token RawToken;
387e5dd7070Spatrick   RawLex.LexFromRawLexer(RawToken);
388e5dd7070Spatrick 
389e5dd7070Spatrick   // TODO: Consider adding a switch that strips possibly unimportant content,
390e5dd7070Spatrick   // such as comments, to reduce the size of repro files.
391e5dd7070Spatrick   while (RawToken.isNot(tok::eof)) {
392e5dd7070Spatrick     if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
393e5dd7070Spatrick       RawLex.setParsingPreprocessorDirective(true);
394e5dd7070Spatrick       Token HashToken = RawToken;
395e5dd7070Spatrick       RawLex.LexFromRawLexer(RawToken);
396e5dd7070Spatrick       if (RawToken.is(tok::raw_identifier))
397e5dd7070Spatrick         PP.LookUpIdentifierInfo(RawToken);
398e5dd7070Spatrick       if (RawToken.getIdentifierInfo() != nullptr) {
399e5dd7070Spatrick         switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
400e5dd7070Spatrick           case tok::pp_include:
401e5dd7070Spatrick           case tok::pp_include_next:
402e5dd7070Spatrick           case tok::pp_import: {
403e5dd7070Spatrick             CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, NextToWrite,
404e5dd7070Spatrick               Line);
405e5dd7070Spatrick             if (FileId != PP.getPredefinesFileID())
406e5dd7070Spatrick               WriteLineInfo(FileName, Line - 1, FileType, "");
407e5dd7070Spatrick             StringRef LineInfoExtra;
408e5dd7070Spatrick             SourceLocation Loc = HashToken.getLocation();
409e5dd7070Spatrick             if (const Module *Mod = FindModuleAtLocation(Loc))
410e5dd7070Spatrick               WriteImplicitModuleImport(Mod);
411e5dd7070Spatrick             else if (const IncludedFile *Inc = FindIncludeAtLocation(Loc)) {
412e5dd7070Spatrick               const Module *Mod = FindEnteredModule(Loc);
413e5dd7070Spatrick               if (Mod)
414e5dd7070Spatrick                 OS << "#pragma clang module begin "
415e5dd7070Spatrick                    << Mod->getFullModuleName(true) << "\n";
416e5dd7070Spatrick 
417e5dd7070Spatrick               // Include and recursively process the file.
418*12c85518Srobert               Process(Inc->Id, Inc->FileType);
419e5dd7070Spatrick 
420e5dd7070Spatrick               if (Mod)
421e5dd7070Spatrick                 OS << "#pragma clang module end /*"
422e5dd7070Spatrick                    << Mod->getFullModuleName(true) << "*/\n";
423e5dd7070Spatrick 
424e5dd7070Spatrick               // Add line marker to indicate we're returning from an included
425e5dd7070Spatrick               // file.
426e5dd7070Spatrick               LineInfoExtra = " 2";
427e5dd7070Spatrick             }
428e5dd7070Spatrick             // fix up lineinfo (since commented out directive changed line
429e5dd7070Spatrick             // numbers) for inclusions that were skipped due to header guards
430e5dd7070Spatrick             WriteLineInfo(FileName, Line, FileType, LineInfoExtra);
431e5dd7070Spatrick             break;
432e5dd7070Spatrick           }
433e5dd7070Spatrick           case tok::pp_pragma: {
434e5dd7070Spatrick             StringRef Identifier = NextIdentifierName(RawLex, RawToken);
435e5dd7070Spatrick             if (Identifier == "clang" || Identifier == "GCC") {
436e5dd7070Spatrick               if (NextIdentifierName(RawLex, RawToken) == "system_header") {
437e5dd7070Spatrick                 // keep the directive in, commented out
438e5dd7070Spatrick                 CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
439e5dd7070Spatrick                   NextToWrite, Line);
440e5dd7070Spatrick                 // update our own type
441e5dd7070Spatrick                 FileType = SM.getFileCharacteristic(RawToken.getLocation());
442e5dd7070Spatrick                 WriteLineInfo(FileName, Line, FileType);
443e5dd7070Spatrick               }
444e5dd7070Spatrick             } else if (Identifier == "once") {
445e5dd7070Spatrick               // keep the directive in, commented out
446e5dd7070Spatrick               CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
447e5dd7070Spatrick                 NextToWrite, Line);
448e5dd7070Spatrick               WriteLineInfo(FileName, Line, FileType);
449e5dd7070Spatrick             }
450e5dd7070Spatrick             break;
451e5dd7070Spatrick           }
452e5dd7070Spatrick           case tok::pp_if:
453e5dd7070Spatrick           case tok::pp_elif: {
454e5dd7070Spatrick             bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
455e5dd7070Spatrick                          tok::pp_elif);
456e5dd7070Spatrick             bool isTrue = IsIfAtLocationTrue(RawToken.getLocation());
457e5dd7070Spatrick             OutputContentUpTo(FromFile, NextToWrite,
458e5dd7070Spatrick                               SM.getFileOffset(HashToken.getLocation()),
459e5dd7070Spatrick                               LocalEOL, Line, /*EnsureNewline=*/true);
460e5dd7070Spatrick             do {
461e5dd7070Spatrick               RawLex.LexFromRawLexer(RawToken);
462e5dd7070Spatrick             } while (!RawToken.is(tok::eod) && RawToken.isNot(tok::eof));
463e5dd7070Spatrick             // We need to disable the old condition, but that is tricky.
464e5dd7070Spatrick             // Trying to comment it out can easily lead to comment nesting.
465e5dd7070Spatrick             // So instead make the condition harmless by making it enclose
466e5dd7070Spatrick             // and empty block. Moreover, put it itself inside an #if 0 block
467e5dd7070Spatrick             // to disable it from getting evaluated (e.g. __has_include_next
468e5dd7070Spatrick             // warns if used from the primary source file).
469e5dd7070Spatrick             OS << "#if 0 /* disabled by -frewrite-includes */" << MainEOL;
470e5dd7070Spatrick             if (elif) {
471e5dd7070Spatrick               OS << "#if 0" << MainEOL;
472e5dd7070Spatrick             }
473e5dd7070Spatrick             OutputContentUpTo(FromFile, NextToWrite,
474e5dd7070Spatrick                               SM.getFileOffset(RawToken.getLocation()) +
475e5dd7070Spatrick                                   RawToken.getLength(),
476e5dd7070Spatrick                               LocalEOL, Line, /*EnsureNewline=*/true);
477e5dd7070Spatrick             // Close the empty block and the disabling block.
478e5dd7070Spatrick             OS << "#endif" << MainEOL;
479e5dd7070Spatrick             OS << "#endif /* disabled by -frewrite-includes */" << MainEOL;
480e5dd7070Spatrick             OS << (elif ? "#elif " : "#if ") << (isTrue ? "1" : "0")
481e5dd7070Spatrick                << " /* evaluated by -frewrite-includes */" << MainEOL;
482e5dd7070Spatrick             WriteLineInfo(FileName, Line, FileType);
483e5dd7070Spatrick             break;
484e5dd7070Spatrick           }
485e5dd7070Spatrick           case tok::pp_endif:
486e5dd7070Spatrick           case tok::pp_else: {
487e5dd7070Spatrick             // We surround every #include by #if 0 to comment it out, but that
488e5dd7070Spatrick             // changes line numbers. These are fixed up right after that, but
489e5dd7070Spatrick             // the whole #include could be inside a preprocessor conditional
490e5dd7070Spatrick             // that is not processed. So it is necessary to fix the line
491e5dd7070Spatrick             // numbers one the next line after each #else/#endif as well.
492e5dd7070Spatrick             RawLex.SetKeepWhitespaceMode(true);
493e5dd7070Spatrick             do {
494e5dd7070Spatrick               RawLex.LexFromRawLexer(RawToken);
495e5dd7070Spatrick             } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
496e5dd7070Spatrick             OutputContentUpTo(FromFile, NextToWrite,
497e5dd7070Spatrick                               SM.getFileOffset(RawToken.getLocation()) +
498e5dd7070Spatrick                                   RawToken.getLength(),
499e5dd7070Spatrick                               LocalEOL, Line, /*EnsureNewline=*/ true);
500e5dd7070Spatrick             WriteLineInfo(FileName, Line, FileType);
501e5dd7070Spatrick             RawLex.SetKeepWhitespaceMode(false);
502e5dd7070Spatrick             break;
503e5dd7070Spatrick           }
504e5dd7070Spatrick           default:
505e5dd7070Spatrick             break;
506e5dd7070Spatrick         }
507e5dd7070Spatrick       }
508e5dd7070Spatrick       RawLex.setParsingPreprocessorDirective(false);
509e5dd7070Spatrick     }
510e5dd7070Spatrick     RawLex.LexFromRawLexer(RawToken);
511e5dd7070Spatrick   }
512e5dd7070Spatrick   OutputContentUpTo(FromFile, NextToWrite,
513e5dd7070Spatrick                     SM.getFileOffset(SM.getLocForEndOfFile(FileId)), LocalEOL,
514e5dd7070Spatrick                     Line, /*EnsureNewline=*/true);
515e5dd7070Spatrick }
516e5dd7070Spatrick 
517e5dd7070Spatrick /// InclusionRewriterInInput - Implement -frewrite-includes mode.
RewriteIncludesInInput(Preprocessor & PP,raw_ostream * OS,const PreprocessorOutputOptions & Opts)518e5dd7070Spatrick void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
519e5dd7070Spatrick                                    const PreprocessorOutputOptions &Opts) {
520e5dd7070Spatrick   SourceManager &SM = PP.getSourceManager();
521e5dd7070Spatrick   InclusionRewriter *Rewrite = new InclusionRewriter(
522e5dd7070Spatrick       PP, *OS, Opts.ShowLineMarkers, Opts.UseLineDirectives);
523e5dd7070Spatrick   Rewrite->detectMainFileEOL();
524e5dd7070Spatrick 
525e5dd7070Spatrick   PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Rewrite));
526e5dd7070Spatrick   PP.IgnorePragmas();
527e5dd7070Spatrick 
528e5dd7070Spatrick   // First let the preprocessor process the entire file and call callbacks.
529e5dd7070Spatrick   // Callbacks will record which #include's were actually performed.
530e5dd7070Spatrick   PP.EnterMainSourceFile();
531e5dd7070Spatrick   Token Tok;
532e5dd7070Spatrick   // Only preprocessor directives matter here, so disable macro expansion
533e5dd7070Spatrick   // everywhere else as an optimization.
534e5dd7070Spatrick   // TODO: It would be even faster if the preprocessor could be switched
535e5dd7070Spatrick   // to a mode where it would parse only preprocessor directives and comments,
536e5dd7070Spatrick   // nothing else matters for parsing or processing.
537e5dd7070Spatrick   PP.SetMacroExpansionOnlyInDirectives();
538e5dd7070Spatrick   do {
539e5dd7070Spatrick     PP.Lex(Tok);
540e5dd7070Spatrick     if (Tok.is(tok::annot_module_begin))
541e5dd7070Spatrick       Rewrite->handleModuleBegin(Tok);
542e5dd7070Spatrick   } while (Tok.isNot(tok::eof));
543a9ac8606Spatrick   Rewrite->setPredefinesBuffer(SM.getBufferOrFake(PP.getPredefinesFileID()));
544*12c85518Srobert   Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
545*12c85518Srobert   Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
546e5dd7070Spatrick   OS->flush();
547e5dd7070Spatrick }
548